Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(rel2.0): default removal option to REMOVE_ALL_EDGES_FROM_SOURCE #478

Merged
merged 7 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,6 @@ project.ext.externalDependency = [
'mockito': 'org.mockito:mockito-core:4.11.0',
'mockitoInline': 'org.mockito:mockito-inline:3.11.2',
'mysql': 'mysql:mysql-connector-java:8.0.29',
'neo4jHarness': 'org.neo4j.test:neo4j-harness:3.4.11',
'neo4jJavaDriver': 'org.neo4j.driver:neo4j-java-driver:4.0.0',
'parseqTest': 'com.linkedin.parseq:parseq-test-api:5.1.20',
'postgresql': 'org.postgresql:postgresql:42.2.14',
'reflections': 'org.reflections:reflections:0.9.11',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
import com.linkedin.metadata.dao.internal.BaseGraphWriterDAO;
import java.util.List;
import javax.annotation.Nonnull;
import lombok.Value;
import lombok.AllArgsConstructor;
import lombok.Data;


/**
Expand All @@ -15,7 +16,8 @@ public abstract class BaseLocalRelationshipBuilder<ASPECT extends RecordTemplate

private final Class<ASPECT> _aspectClass;

@Value
@AllArgsConstructor
@Data
public static class LocalRelationshipUpdates<RELATIONSHIP extends RecordTemplate> {
List<RELATIONSHIP> relationships;
Class<RELATIONSHIP> relationshipClass;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
*/
public abstract class BaseGraphWriterDAO {

// These removal options are deprecated; all relationship ingestion uses REMOVE_ALL_EDGES_FROM_SOURCE.
public enum RemovalOption {
REMOVE_NONE,
REMOVE_ALL_EDGES_FROM_SOURCE,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import com.linkedin.common.urn.Urn;
import com.linkedin.data.template.RecordTemplate;
import com.linkedin.metadata.dao.internal.BaseGraphWriterDAO;
import java.util.List;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
Expand All @@ -11,37 +10,34 @@


public class GraphUtils {
private static final String SOURCE = "source";
private GraphUtils() {
// Util class
}

/**
* Check if a group relationship shares the same source urn, destination urn or both based on the remove option.
* Check if a group relationship shares the same source urn.
* @param relationships list of relationships
* @param removalOption removal option to specify which relationships to be removed
* @param sourceField name of the source field
* @param destinationField name of the destination field
* @param urn source urn to compare. Optional for V1. Needed for V2.
* @param assetUrn source urn to compare. Optional for V1. Needed for V2.
*/
public static void checkSameUrn(@Nonnull final List<? extends RecordTemplate> relationships,
@Nonnull final BaseGraphWriterDAO.RemovalOption removalOption, @Nonnull final String sourceField,
@Nonnull final String destinationField, @Nullable Urn urn) {

public static void checkSameSourceUrn(@Nonnull final List<? extends RecordTemplate> relationships, @Nullable Urn assetUrn) {
if (relationships.isEmpty()) {
return;
}

final Urn sourceUrn = getSourceUrnBasedOnRelationshipVersion(relationships.get(0), urn);
final Urn destinationUrn = getDestinationUrnFromRelationship(relationships.get(0));

if (removalOption == BaseGraphWriterDAO.RemovalOption.REMOVE_ALL_EDGES_FROM_SOURCE) {
checkSameUrn(relationships, sourceField, sourceUrn);
} else if (removalOption == BaseGraphWriterDAO.RemovalOption.REMOVE_ALL_EDGES_TO_DESTINATION) {
checkSameUrn(relationships, destinationField, destinationUrn);
} else if (removalOption == BaseGraphWriterDAO.RemovalOption.REMOVE_ALL_EDGES_FROM_SOURCE_TO_DESTINATION) {
checkSameUrn(relationships, sourceField, sourceUrn);
checkSameUrn(relationships, destinationField, destinationUrn);
for (RecordTemplate relationship : relationships) {
if (ModelUtils.isRelationshipInV2(relationship.schema())) {
if (assetUrn == null) {
throw new IllegalArgumentException("Something went wrong. The asset urn is missing which is required during "
+ "ingestion of a model 2.0 relationship. Relationship model: " + relationship);
}
// Skip source urn check for V2 relationships since they don't have source field
} else {
Urn compare = assetUrn == null ? getSourceUrnFromRelationship(relationships.get(0)) : assetUrn;
Urn source = getSourceUrnFromRelationship(relationship);
if (!compare.equals(source)) {
throw new IllegalArgumentException(
String.format("Relationships have different source urns. Urn being compared to: %s, Relationship source: %s", compare, source));
}
}
}
}

Expand All @@ -67,24 +63,4 @@ public static <RELATIONSHIP extends RecordTemplate> Urn getSourceUrnBasedOnRelat
}
return sourceUrn;
}

public static void checkSameUrn(@Nonnull final List<? extends RecordTemplate> relationships,
@Nonnull final BaseGraphWriterDAO.RemovalOption removalOption, @Nonnull final String sourceField,
@Nonnull final String destinationField) {
checkSameUrn(relationships, removalOption, sourceField, destinationField, null);
}

private static void checkSameUrn(@Nonnull List<? extends RecordTemplate> records, @Nonnull String field,
@Nonnull Urn compare) {
for (RecordTemplate relation : records) {
if (ModelUtils.isRelationshipInV2(relation.schema()) && field.equals(SOURCE)) {
// Skip source urn check for V2 relationships since they don't have source field
// ToDo: enhance the source check for V2 relationships
return;
}
if (!compare.equals(ModelUtils.getUrnFromRelationship(relation, field))) {
throw new IllegalArgumentException("Records have different " + field + " urn");
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package com.linkedin.metadata.dao.utils;

import com.linkedin.data.template.RecordTemplate;
import com.linkedin.metadata.dao.internal.BaseGraphWriterDAO;
import com.linkedin.testing.RelationshipBar;
import com.linkedin.testing.RelationshipFoo;
import com.linkedin.testing.RelationshipV2Bar;
Expand All @@ -19,15 +18,14 @@
public class GraphUtilsTest {

@Test
public void testCheckSameUrnWithEmptyRelationships() {
public void testCheckSameSourceUrnWithEmptyRelationships() {
List<RecordTemplate> relationships = Collections.emptyList();
GraphUtils.checkSameUrn(relationships, BaseGraphWriterDAO.RemovalOption.REMOVE_ALL_EDGES_FROM_SOURCE, "source", "destination");
GraphUtils.checkSameUrn(relationships, BaseGraphWriterDAO.RemovalOption.REMOVE_ALL_EDGES_TO_DESTINATION, "source", "destination", new BarUrn(1));
GraphUtils.checkSameSourceUrn(relationships, null);
// No exception should be thrown
}

@Test
public void testCheckSameUrnWithSameSourceUrn() {
public void testCheckSameSourceUrnWithSameSourceUrn() throws URISyntaxException {
// Test cases for relationship V1
RelationshipFoo relationship;
try {
Expand All @@ -37,41 +35,46 @@ public void testCheckSameUrnWithSameSourceUrn() {
}

List<RecordTemplate> relationships = Lists.newArrayList(relationship, relationship);
// when the assetUrn is not provided for relationship v1, use the first relationship's source to compare instead
try {
GraphUtils.checkSameUrn(relationships, BaseGraphWriterDAO.RemovalOption.REMOVE_ALL_EDGES_FROM_SOURCE, "source", "destination");
GraphUtils.checkSameSourceUrn(relationships, null);
} catch (IllegalArgumentException e) {
fail("Expected no IllegalArgumentException to be thrown, but got: " + e.getMessage());
}

// when urn is provided for relationship V1, the provided urn should be ignored
// when the assetUrn is provided for relationship V1, check all relationships to see if they have the same source
try {
GraphUtils.checkSameUrn(relationships, BaseGraphWriterDAO.RemovalOption.REMOVE_ALL_EDGES_FROM_SOURCE, "source", "destination", new BarUrn(10));
GraphUtils.checkSameSourceUrn(relationships, new FooUrn(1));
} catch (IllegalArgumentException e) {
fail("Expected no IllegalArgumentException to be thrown, but got: " + e.getMessage());
}

// given an assetUrn that is not the same as the source urns of the relationships, throw an exception
try {
GraphUtils.checkSameSourceUrn(relationships, new BarUrn(10));
fail("Expected an IllegalArgumentException to be thrown, but it wasn't");
} catch (IllegalArgumentException e) {
// do nothing
}

// Test cases for relationship V2
RelationshipV2Bar relationshipV2 = mockRelationshipV2Bar(new BarUrn(2));
List<RecordTemplate> relationshipsV2 = Lists.newArrayList(relationshipV2, relationshipV2);
// when urn is provided for relationship V2, the check should pass
try {
GraphUtils.checkSameUrn(relationshipsV2, BaseGraphWriterDAO.RemovalOption.REMOVE_ALL_EDGES_FROM_SOURCE, "source", "destination", new BarUrn(2));
GraphUtils.checkSameSourceUrn(relationshipsV2, new BarUrn(2));
} catch (IllegalArgumentException e) {
fail("Expected no IllegalArgumentException to be thrown, but got: " + e.getMessage());
}
// when urn is not provided for relationship V2, it should throw IllegalArgumentException
assertThrows(IllegalArgumentException.class,
() -> GraphUtils.checkSameUrn(
relationshipsV2,
BaseGraphWriterDAO.RemovalOption.REMOVE_ALL_EDGES_FROM_SOURCE,
"source",
"destination")
() -> GraphUtils.checkSameSourceUrn(relationshipsV2, null)
);
}

@Test
public void testCheckSameUrnWithDifferentSourceUrn() {
// Test cases for relationship V1
public void testCheckSameSourceUrnWithDifferentSourceUrn() {
// Test cases for relationship V1 (not applicable to relationship v2)
RecordTemplate relationship1;
RecordTemplate relationship2;
try {
Expand All @@ -83,75 +86,17 @@ public void testCheckSameUrnWithDifferentSourceUrn() {

List<RecordTemplate> relationships = Lists.newArrayList(relationship1, relationship2);
assertThrows(IllegalArgumentException.class,
() -> GraphUtils.checkSameUrn(
relationships,
BaseGraphWriterDAO.RemovalOption.REMOVE_ALL_EDGES_FROM_SOURCE,
"source",
"destination")
() -> GraphUtils.checkSameSourceUrn(relationships, null)
);
// when urn is provided for relationship V1, the provided urn should be ignored
assertThrows(IllegalArgumentException.class,
() -> GraphUtils.checkSameUrn(
relationships,
BaseGraphWriterDAO.RemovalOption.REMOVE_ALL_EDGES_FROM_SOURCE,
"source",
"destination",
new BarUrn(10))
);

// ToDo: add test cases for V2. Right now it check if a list of relationships have the same source urn.
}

@Test
public void testCheckSameUrnWithSameDestinationUrn() {
// Test cases for relationship V1
RelationshipFoo relationship1;
try {
relationship1 = mockRelationshipFoo(new FooUrn(1), new BarUrn(2));
} catch (URISyntaxException e) {
throw new RuntimeException(e);
}
List<RecordTemplate> relationships1 = Lists.newArrayList(relationship1, relationship1);

try {
GraphUtils.checkSameUrn(relationships1, BaseGraphWriterDAO.RemovalOption.REMOVE_ALL_EDGES_TO_DESTINATION, "source",
"destination");
} catch (IllegalArgumentException e) {
fail("Expected no IllegalArgumentException to be thrown, but got: " + e.getMessage());
}

// when urn is provided for relationship V1, the provided urn should be ignored
try {
GraphUtils.checkSameUrn(relationships1, BaseGraphWriterDAO.RemovalOption.REMOVE_ALL_EDGES_TO_DESTINATION, "source",
"destination", new BarUrn(10));
} catch (IllegalArgumentException e) {
fail("Expected no IllegalArgumentException to be thrown, but got: " + e.getMessage());
}

// Test cases for relationship V2
RelationshipV2Bar relationship2 = mockRelationshipV2Bar(new BarUrn(2));
List<RecordTemplate> relationships2 = Lists.newArrayList(relationship2, relationship2);

// throws exception if V2 relationships without source urn provided
assertThrows(IllegalArgumentException.class,
() -> GraphUtils.checkSameUrn(
relationships2,
BaseGraphWriterDAO.RemovalOption.REMOVE_ALL_EDGES_TO_DESTINATION,
"source",
"destination")
() -> GraphUtils.checkSameSourceUrn(relationships, new BarUrn(10))
);

try {
GraphUtils.checkSameUrn(relationships2, BaseGraphWriterDAO.RemovalOption.REMOVE_ALL_EDGES_TO_DESTINATION, "source",
"destination", new BarUrn(10));
} catch (IllegalArgumentException e) {
fail("Expected no IllegalArgumentException to be thrown, but got: " + e.getMessage());
}
}

@Test
public void testCheckSameUrnWithDifferentDestinationUrn() {
// Test cases for relationship V1
public void testCheckSameSourceUrnWithDifferentDestinationUrn() {
// Test cases for relationship V1 (already tested v2 case in testCheckSameSourceUrnWithSameSourceUrn)
RelationshipFoo relationship1;
RelationshipBar relationship2;
try {
Expand All @@ -163,41 +108,11 @@ public void testCheckSameUrnWithDifferentDestinationUrn() {

List<RecordTemplate> relationships1 = Lists.newArrayList(relationship1, relationship2);
assertThrows(IllegalArgumentException.class,
() -> GraphUtils.checkSameUrn(
relationships1,
BaseGraphWriterDAO.RemovalOption.REMOVE_ALL_EDGES_TO_DESTINATION,
"source",
"destination")
);

assertThrows(IllegalArgumentException.class,
() -> GraphUtils.checkSameUrn(
relationships1,
BaseGraphWriterDAO.RemovalOption.REMOVE_ALL_EDGES_TO_DESTINATION,
"source",
"destination",
new BarUrn(10))
);

// Test cases for relationship V2
RelationshipV2Bar relationship3 = mockRelationshipV2Bar(new BarUrn(3));
RelationshipV2Bar relationship4 = mockRelationshipV2Bar(new BarUrn(4));
List<RecordTemplate> relationships2 = Lists.newArrayList(relationship3, relationship4);
assertThrows(IllegalArgumentException.class,
() -> GraphUtils.checkSameUrn(
relationships2,
BaseGraphWriterDAO.RemovalOption.REMOVE_ALL_EDGES_TO_DESTINATION,
"source",
"destination")
() -> GraphUtils.checkSameSourceUrn(relationships1, null)
);

assertThrows(IllegalArgumentException.class,
() -> GraphUtils.checkSameUrn(
relationships2,
BaseGraphWriterDAO.RemovalOption.REMOVE_ALL_EDGES_TO_DESTINATION,
"source",
"destination",
new BarUrn(10))
() -> GraphUtils.checkSameSourceUrn(relationships1, new BarUrn(10))
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
import com.linkedin.testing.SnapshotUnionWithEntitySnapshotOptionalFields;
import com.linkedin.testing.TyperefPizzaAspect;
import com.linkedin.testing.localrelationship.AspectFooBar;
import com.linkedin.testing.localrelationship.AspectFooBaz;
import com.linkedin.testing.localrelationship.AspectFooBarBaz;
import com.linkedin.testing.namingedgecase.EntityValueNamingEdgeCase;
import com.linkedin.testing.namingedgecase.InternalEntityAspectUnionNamingEdgeCase;
Expand Down Expand Up @@ -108,7 +109,7 @@ public void testGetValidAspectTypes() {
Set<Class<? extends RecordTemplate>> validTypes = ModelUtils.getValidAspectTypes(EntityAspectUnion.class);

assertEquals(validTypes,
ImmutableSet.of(AspectFoo.class, AspectBar.class, AspectFooBar.class, AspectFooBarBaz.class, AspectAttributes.class));
ImmutableSet.of(AspectFoo.class, AspectBar.class, AspectFooBar.class, AspectFooBaz.class, AspectFooBarBaz.class, AspectAttributes.class));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -880,6 +880,8 @@ protected <ASPECT extends RecordTemplate> void insert(@Nonnull URN urn, @Nullabl
/**
* If the aspect is associated with at least one relationship, upsert the relationship into the corresponding local
* relationship table. Associated means that the aspect has a registered relationship build or it includes a relationship field.
* It will first try to find a registered relationship builder; if one doesn't exist or returns no relationship updates,
* try to find relationships from the aspect itself.
* @param urn Urn of the metadata update
* @param aspect Aspect of the metadata update
* @param aspectClass Aspect class of the metadata update
Expand All @@ -892,20 +894,24 @@ public <ASPECT extends RecordTemplate, RELATIONSHIP extends RecordTemplate> List
return Collections.emptyList();
}
List<LocalRelationshipUpdates> localRelationshipUpdates = Collections.emptyList();
if (_relationshipSource == RelationshipSource.ASPECT_METADATA) {
// Try to get relationships using relationship builders first. If there is not a relationship builder registered
// for the aspect class, try to get relationships from the aspect metadata instead. After most relationship models
// are using Model 2.0, switch the priority i.e. try to get the relationship from the aspect first before falling back
// on relationship builders.
// TODO: fix the gap where users can define new relationships in the aspect while still using graph builders to extract existing relationships
if (_localRelationshipBuilderRegistry != null && _localRelationshipBuilderRegistry.isRegistered(aspectClass)) {
localRelationshipUpdates = _localRelationshipBuilderRegistry.getLocalRelationshipBuilder(aspect).buildRelationships(urn, aspect);
// default all relationship updates to use REMOVE_ALL_EDGES_FROM_SOURCE
localRelationshipUpdates.forEach(update -> update.setRemovalOption(BaseGraphWriterDAO.RemovalOption.REMOVE_ALL_EDGES_FROM_SOURCE));
}
// If no relationship updates were found using relationship builders, try to get them via the aspect.
if (localRelationshipUpdates.isEmpty()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would there be a case that an Aspect has one or multiple graph builders and also contains ASPECT_METADATA?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An aspect cannot have multiple graph builders due to the way that the builders are registered.

An aspect can have a graph builder and also contain ASPECT_METADATA. The behavior in this PR will prioritize finding the relationships from the graph builders first. Otherwise, try the aspect metadata. Otherwise, no relationship updates.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An aspect cannot have multiple graph builders due to the way that the builders are registered.

Can an aspect have multiple ASPECT_METADATA?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

prioritize finding the relationships from the graph builders first.

what if a new ASPECT_METADATA added into an aspect with graph builder, will the ASPECT_METADATA be ignored?

Copy link
Contributor Author

@jsdonn jsdonn Nov 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can an aspect have multiple ASPECT_METADATA?

Aspects can have multiple relationship-typed fields (either array or singleton) yes.

what if a new ASPECT_METADATA added into an aspect with graph builder, will the ASPECT_METADATA be ignored?

If the graph builder returns non-empty relationships, then yes the relationships from ASPECT_METADATA will be ignored. If the graph builder returns empty list, then it will try to look in ASPECT_METADATA for relationships.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this can be a limitation. You can include a comment to indicate this if we need more thought on its future behavior.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you think it's a limitation? the alternative is that we can prioritize getting the relationship from aspect metadata first and then falling back on relationship builders if needed. But that is an easy change we can make in the future

Copy link
Contributor

@yangyangv2 yangyangv2 Nov 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is possible that users add a new relationship model on this aspect and start filling relationship data with new relationships but want to keep existing graph builder working from existing attributes

Copy link
Contributor

@yangyangv2 yangyangv2 Nov 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the consideration to not support both is we don't want to have graph builder and the default relationship parser produce duplicate relationships. It is a solvable problem IMO, but want to think more if there're other side impact

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pointing this out. I added a TODO comment noting this gap.

Map<Class<?>, Set<RELATIONSHIP>> allRelationships = EBeanDAOUtils.extractRelationshipsFromAspect(aspect);
localRelationshipUpdates = allRelationships.entrySet().stream()
.filter(entry -> !entry.getValue().isEmpty()) // ensure at least 1 relationship in sublist to avoid index out of bounds
.map(entry -> new LocalRelationshipUpdates(
Arrays.asList(entry.getValue().toArray()), entry.getKey(), BaseGraphWriterDAO.RemovalOption.REMOVE_ALL_EDGES_FROM_SOURCE))
.collect(Collectors.toList());
} else if (_relationshipSource == RelationshipSource.RELATIONSHIP_BUILDERS) {
if (_localRelationshipBuilderRegistry != null && _localRelationshipBuilderRegistry.isRegistered(aspectClass)) {
localRelationshipUpdates = _localRelationshipBuilderRegistry.getLocalRelationshipBuilder(aspect).buildRelationships(urn, aspect);
}
} else {
throw new UnsupportedOperationException("Please ensure that the RelationshipSource enum is properly set using "
+ "setRelationshipSource method.");
}
_localRelationshipWriterDAO.processLocalRelationshipUpdates(urn, localRelationshipUpdates, isTestMode);
return localRelationshipUpdates;
Expand Down
Loading
Loading