diff --git a/delta/plugins/graph-analytics/src/main/resources/elasticsearch/mappings.json b/delta/plugins/graph-analytics/src/main/resources/elasticsearch/mappings.json index 7b8169aeca..3114f49196 100644 --- a/delta/plugins/graph-analytics/src/main/resources/elasticsearch/mappings.json +++ b/delta/plugins/graph-analytics/src/main/resources/elasticsearch/mappings.json @@ -12,6 +12,9 @@ "_rev": { "type": "long" }, + "_deprecated": { + "type": "boolean" + }, "_createdAt": { "type": "date" }, diff --git a/delta/plugins/graph-analytics/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/graph/analytics/indexing/GraphAnalyticsResult.scala b/delta/plugins/graph-analytics/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/graph/analytics/indexing/GraphAnalyticsResult.scala index b998a20114..f143b6b176 100644 --- a/delta/plugins/graph-analytics/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/graph/analytics/indexing/GraphAnalyticsResult.scala +++ b/delta/plugins/graph-analytics/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/graph/analytics/indexing/GraphAnalyticsResult.scala @@ -7,7 +7,7 @@ import io.circe.{Encoder, Json} import ch.epfl.bluebrain.nexus.delta.sdk.syntax._ import ch.epfl.bluebrain.nexus.delta.sourcing.model.Identity.Subject import ch.epfl.bluebrain.nexus.delta.sourcing.model.ProjectRef -import io.circe.syntax.EncoderOps +import io.circe.syntax.{EncoderOps, KeyOps} import java.time.Instant @@ -38,38 +38,63 @@ object GraphAnalyticsResult { * The value is indexed to Elasticsearch and an update by query action is required so that the type of this resource * is propagated to the resources pointing to it. * - * The other fields are metadata and match the same definition as in [[ResourceState]]. - * - * @see - * [[ResouceState]] + * The other fields are resource metadata. */ - final case class Index( + final case class Index private ( project: ProjectRef, id: Iri, rev: Int, + deprecated: Boolean, types: Set[Iri], createdAt: Instant, createdBy: Subject, updatedAt: Instant, updatedBy: Subject, - value: JsonLdDocument + value: Option[JsonLdDocument] ) extends GraphAnalyticsResult object Index { - implicit val encoder: Encoder[Index] = Encoder.instance { g => + + def active( + project: ProjectRef, + id: Iri, + rev: Int, + types: Set[Iri], + createdAt: Instant, + createdBy: Subject, + updatedAt: Instant, + updatedBy: Subject, + value: JsonLdDocument + ) = + new Index(project, id, rev, false, types, createdAt, createdBy, updatedAt, updatedBy, Some(value)) + + def deprecated( + project: ProjectRef, + id: Iri, + rev: Int, + types: Set[Iri], + createdAt: Instant, + createdBy: Subject, + updatedAt: Instant, + updatedBy: Subject + ) = + new Index(project, id, rev, true, types, createdAt, createdBy, updatedAt, updatedBy, None) + + implicit val encoder: Encoder[Index] = Encoder.instance { i => import ch.epfl.bluebrain.nexus.delta.sourcing.model.Identity.Database._ Json .obj( - keywords.id -> g.id.asJson, - "_project" -> g.project.asJson, - "_rev" -> g.rev.asJson, - "_createdAt" -> g.createdAt.asJson, - "_createdBy" -> g.createdBy.asJson, - "_updatedAt" -> g.updatedAt.asJson, - "_updatedBy" -> g.updatedBy.asJson + keywords.id := i.id, + "_project" := i.project, + "_rev" := i.rev, + "_deprecated" := i.deprecated, + "_createdAt" := i.createdAt, + "_createdBy" := i.createdBy, + "_updatedAt" := i.updatedAt, + "_updatedBy" := i.updatedBy ) - .addIfNonEmpty(keywords.tpe, g.types) - .deepMerge(g.value.asJson) + .addIfNonEmpty(keywords.tpe, i.types) + .deepMerge(i.value.map(_.asJson).getOrElse(Json.obj())) } } diff --git a/delta/plugins/graph-analytics/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/graph/analytics/indexing/GraphAnalyticsStream.scala b/delta/plugins/graph-analytics/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/graph/analytics/indexing/GraphAnalyticsStream.scala index 4d3c11ed31..88b3972924 100644 --- a/delta/plugins/graph-analytics/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/graph/analytics/indexing/GraphAnalyticsStream.scala +++ b/delta/plugins/graph-analytics/src/main/scala/ch/epfl/bluebrain/nexus/delta/plugins/graph/analytics/indexing/GraphAnalyticsStream.scala @@ -91,10 +91,15 @@ object GraphAnalyticsStream { UpdateByQuery(s.id, s.types) } case Resources.entityType => - Task.fromEither(ResourceState.serializer.codec.decodeJson(json)).flatMap { s => - JsonLdDocument.fromExpanded(s.expanded, findRelationships(project, xas, relationshipBatch)).map { d => - Index(s.project, s.id, s.rev, s.types, s.createdAt, s.createdBy, s.updatedAt, s.updatedBy, d) - } + Task.fromEither(ResourceState.serializer.codec.decodeJson(json)).flatMap { + case s if s.deprecated => + Task.pure( + Index.deprecated(s.project, s.id, s.rev, s.types, s.createdAt, s.createdBy, s.updatedAt, s.updatedBy) + ) + case s => + JsonLdDocument.fromExpanded(s.expanded, findRelationships(project, xas, relationshipBatch)).map { d => + Index.active(s.project, s.id, s.rev, s.types, s.createdAt, s.createdBy, s.updatedAt, s.updatedBy, d) + } } case _ => Task.pure(Noop) } diff --git a/delta/plugins/graph-analytics/src/test/resources/result/resource1.json b/delta/plugins/graph-analytics/src/test/resources/result/resource1.json index bcf10acecf..49d47d28e5 100644 --- a/delta/plugins/graph-analytics/src/test/resources/result/resource1.json +++ b/delta/plugins/graph-analytics/src/test/resources/result/resource1.json @@ -7,6 +7,7 @@ ], "_project": "myorg/myproject", "_rev": 1, + "_deprecated": false, "_createdAt": "1970-01-01T00:00:00Z", "_createdBy": { "@type": "Anonymous" diff --git a/delta/plugins/graph-analytics/src/test/resources/result/resource1_updated.json b/delta/plugins/graph-analytics/src/test/resources/result/resource1_updated.json index 98a46eab98..994d6379fa 100644 --- a/delta/plugins/graph-analytics/src/test/resources/result/resource1_updated.json +++ b/delta/plugins/graph-analytics/src/test/resources/result/resource1_updated.json @@ -7,6 +7,7 @@ ], "_project": "myorg/myproject", "_rev": 1, + "_deprecated": false, "_createdAt": "1970-01-01T00:00:00Z", "_createdBy": { "@type": "Anonymous" diff --git a/delta/plugins/graph-analytics/src/test/resources/result/resource2.json b/delta/plugins/graph-analytics/src/test/resources/result/resource2.json index f4ddde87c8..29092e2466 100644 --- a/delta/plugins/graph-analytics/src/test/resources/result/resource2.json +++ b/delta/plugins/graph-analytics/src/test/resources/result/resource2.json @@ -9,6 +9,7 @@ ], "_project": "myorg/myproject", "_rev": 1, + "_deprecated": false, "_createdAt": "1970-01-01T00:00:00Z", "_createdBy": { "@type": "Anonymous" diff --git a/delta/plugins/graph-analytics/src/test/resources/result/resource_deprecated.json b/delta/plugins/graph-analytics/src/test/resources/result/resource_deprecated.json new file mode 100644 index 0000000000..3b4097b727 --- /dev/null +++ b/delta/plugins/graph-analytics/src/test/resources/result/resource_deprecated.json @@ -0,0 +1,18 @@ +{ + "@type": [ + "http://schema.org/Dataset", + "https://neuroshapes.org/NeuroMorphology" + ], + "@id": "http://localhost/deprecated", + "_project": "myorg/myproject", + "_rev": 1, + "_deprecated": true, + "_createdAt": "1970-01-01T00:00:00Z", + "_createdBy": { + "@type": "Anonymous" + }, + "_updatedAt": "1970-01-01T00:00:00Z", + "_updatedBy": { + "@type": "Anonymous" + } +} \ No newline at end of file diff --git a/delta/plugins/graph-analytics/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/graph/analytics/indexing/GraphAnalyticsSinkSuite.scala b/delta/plugins/graph-analytics/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/graph/analytics/indexing/GraphAnalyticsSinkSuite.scala index 4ef5022364..f70f379fde 100644 --- a/delta/plugins/graph-analytics/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/graph/analytics/indexing/GraphAnalyticsSinkSuite.scala +++ b/delta/plugins/graph-analytics/src/test/scala/ch/epfl/bluebrain/nexus/delta/plugins/graph/analytics/indexing/GraphAnalyticsSinkSuite.scala @@ -2,6 +2,7 @@ package ch.epfl.bluebrain.nexus.delta.plugins.graph.analytics.indexing import ch.epfl.bluebrain.nexus.delta.plugins.elasticsearch.ElasticSearchClientSetup import ch.epfl.bluebrain.nexus.delta.plugins.elasticsearch.client.IndexLabel +import ch.epfl.bluebrain.nexus.delta.plugins.graph.analytics.indexing.GraphAnalyticsResult.Index import ch.epfl.bluebrain.nexus.delta.plugins.graph.analytics.model.JsonLdDocument import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.nxvFile import ch.epfl.bluebrain.nexus.delta.rdf.IriOrBNode.Iri @@ -36,7 +37,7 @@ class GraphAnalyticsSinkSuite private val index = IndexLabel.unsafe("test_analytics") - private lazy val sink = new GraphAnalyticsSink(client, 2, 100.millis, index) + private lazy val sink = new GraphAnalyticsSink(client, 5, 100.millis, index) private val project = ProjectRef.unsafe("myorg", "myproject") @@ -50,6 +51,11 @@ class GraphAnalyticsSinkSuite private val resource2 = iri"http://localhost/resource2" private val expanded2 = loadExpanded("expanded/resource2.json") + // Deprecated resource + private val deprecatedResource = iri"http://localhost/deprecated" + private val deprecatedResourceTypes = + Set(iri"http://schema.org/Dataset", iri"https://neuroshapes.org/NeuroMorphology") + // Resource linked by 'resource1', resolved while indexing private val resource3 = iri"http://localhost/resource3" // File linked by 'resource1', resolved after an update by query @@ -90,33 +96,39 @@ class GraphAnalyticsSinkSuite SuccessElem(Resources.entityType, id, Some(project), Instant.EPOCH, Offset.start, result, 1) test("Push index results") { - def toIndex(id: Iri, io: UIO[ExpandedJsonLd]) = { + def indexActive(id: Iri, io: UIO[ExpandedJsonLd]) = { for { expanded <- io types <- getTypes(expanded) doc <- JsonLdDocument.fromExpanded(expanded, _ => findRelationships) } yield { - val result = - GraphAnalyticsResult.Index(project, id, 1, types, Instant.EPOCH, Anonymous, Instant.EPOCH, Anonymous, doc) + val result = Index.active(project, id, 1, types, Instant.EPOCH, Anonymous, Instant.EPOCH, Anonymous, doc) success(id, result) } } + def indexDeprecated(id: Iri, types: Set[Iri]) = + success(id, Index.deprecated(project, id, 1, types, Instant.EPOCH, Anonymous, Instant.EPOCH, Anonymous)) + for { - r1 <- toIndex(resource1, expanded1) - r2 <- toIndex(resource2, expanded2) - r3 = success(resource3, GraphAnalyticsResult.Noop) - chunk = Chunk.seq(List(r1, r2, r3)) + active1 <- indexActive(resource1, expanded1) + active2 <- indexActive(resource2, expanded2) + discarded = success(resource3, GraphAnalyticsResult.Noop) + deprecated = indexDeprecated(deprecatedResource, deprecatedResourceTypes) + chunk = Chunk.seq(List(active1, active2, discarded, deprecated)) // We expect no error - _ <- sink(chunk).assert(chunk.map(_.void)) - // 2 documents should have been indexed correctly: + _ <- sink(chunk).assert(chunk.map(_.void)) + // 3 documents should have been indexed correctly: // - `resource1` with the relationship to `resource3` resolved // - `resource2` with no reference resolved - _ <- client.count(index.value).eventually(2L) - expected1 <- ioJsonContentOf("result/resource1.json") - expected2 <- ioJsonContentOf("result/resource2.json") - _ <- client.getSource[Json](index, resource1.toString).eventually(expected1) - _ <- client.getSource[Json](index, resource2.toString).eventually(expected2) + // - `deprecatedResource` with only metadata, resolution is skipped + _ <- client.count(index.value).eventually(3L) + expected1 <- ioJsonContentOf("result/resource1.json") + expected2 <- ioJsonContentOf("result/resource2.json") + expectedDeprecated <- ioJsonContentOf("result/resource_deprecated.json") + _ <- client.getSource[Json](index, resource1.toString).eventually(expected1) + _ <- client.getSource[Json](index, resource2.toString).eventually(expected2) + _ <- client.getSource[Json](index, deprecatedResource.toString).eventually(expectedDeprecated) } yield () } @@ -145,7 +157,7 @@ class GraphAnalyticsSinkSuite _ <- client.refresh(index) expected1 <- ioJsonContentOf("result/resource1_updated.json") expected2 <- ioJsonContentOf("result/resource2.json") - _ <- client.count(index.value).eventually(2L) + _ <- client.count(index.value).eventually(3L) _ <- client.getSource[Json](index, resource1.toString).eventually(expected1) _ <- client.getSource[Json](index, resource2.toString).eventually(expected2) } yield () diff --git a/docs/src/main/paradox/docs/delta/api/graph-analytics-api.md b/docs/src/main/paradox/docs/delta/api/graph-analytics-api.md index 2250426455..78dc16abf5 100644 --- a/docs/src/main/paradox/docs/delta/api/graph-analytics-api.md +++ b/docs/src/main/paradox/docs/delta/api/graph-analytics-api.md @@ -1,8 +1,12 @@ # Graph analytics -Graph analytics are a functionality introduced by the `graph-analytics` plugin and rooted in the `/v1/graph-analytics/{org_label}/{project_label}` collection. -They provide ways to get insights about the data and their relationships in terms of types (@type field). - +Graph analytics is a feature introduced by the `graph-analytics` plugin and rooted in the `/v1/graph-analytics/{org_label}/{project_label}` collection. + +It runs for each project and it parses and breaks down non-deprecated resources to analyse their structure. +For each of these resources, it extracts the following information: + +* Its properties: their path and the type of the associated value +* Its relationships, that is to say the other resources in the same project it points to. @@@ note { .tip title="Authorization notes" } @@ -90,7 +94,13 @@ An example of the ElasticSearch Document looks as follows: { "@id": "http://example.com/Anna", "@type": "http://schema.org/Person", + "_project": "myorg/myproject", "_rev": 4, + "_deprecated": false, + "_createdAt": "2023-06-01T00:00:00Z", + "_createdBy": { "@type": "User", "realm": "bbp", "subject": "Bob" }, + "_updatedAt": "2023-06-12T00:00:00Z", + "_updatedBy": { "@type": "User", "realm": "bbp", "subject": "Alice" }, "properties": [ { "dataType": "object", @@ -132,7 +142,7 @@ An example of the ElasticSearch Document looks as follows: "references": [ { "found": true, - "@id": "http://example.com/Robert", + "@id": "http://example.com/Robert" } ], "relationships": [