Skip to content

Commit

Permalink
cleanup unrelated classes when rooting (GSI-720)
Browse files Browse the repository at this point in the history
... from schemapack.
  • Loading branch information
KerstenBreuer committed Apr 25, 2024
1 parent f476198 commit c67eee0
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 5 deletions.
11 changes: 11 additions & 0 deletions examples/schemapack/valid/unrelated_classes.schemapack.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
schemapack: 0.3.0
description: Two unrelated classes
classes:
SomeClass:
id:
propertyName: alias
content: ../../content_schemas/AnyObject.schema.json
AnotherClass:
id:
propertyName: alias
content: ../../content_schemas/AnyObject.schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
schemapack: 0.3.0
description: Two unrelated classes # but rooted to one class so that only that class is
# is kept.
classes:
SomeClass:
id:
propertyName: alias
content: ../../content_schemas/AnyObject.schema.json
rootClass: SomeClass
79 changes: 74 additions & 5 deletions src/schemapack/_internals/isolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from schemapack.spec.schemapack import SchemaPack


def identify_dependencies( # noqa: C901, PLR0912
def identify_resource_dependencies( # noqa: C901, PLR0912
*,
datapack: DataPack,
class_name: ClassName,
Expand Down Expand Up @@ -125,7 +125,7 @@ def identify_dependencies( # noqa: C901, PLR0912
dependencies_by_class[target_class_name].add(target_id)

# Recursively add dependencies of this target resource:
nested_dependencies = identify_dependencies(
nested_dependencies = identify_resource_dependencies(
datapack=datapack,
class_name=target_class_name,
resource_id=target_id,
Expand Down Expand Up @@ -206,7 +206,7 @@ def isolate_resource(
If it became apparent that the datapack was not already validated against
the schemapack.
"""
dependency_map = identify_dependencies(
dependency_map = identify_resource_dependencies(
datapack=datapack,
class_name=class_name,
resource_id=resource_id,
Expand All @@ -218,6 +218,70 @@ def isolate_resource(
return rooted_datapack


def identify_class_dependencies(
*,
class_name: ClassName,
schemapack: SchemaPack,
_class_blacklist: Optional[set[ClassName]] = None,
) -> set[ClassName]:
"""Identify all dependencies (recursively) for a given class in the given schemapack.
Args:
class_name:
The class for which to identify dependencies.
schemapack:
The schemapack used for looking up the classes of relations.
_class_blacklist:
A set of class names to avoid getting lost in infinity loop for circular
dependencies. This is only used internally for recursion.
Raises:
schemapack.Exceptions.ClassNotFoundError:
If the class_name does not exist in the schemapack.
"""
class_definition = schemapack.classes.get(class_name)
if class_definition is None:
raise ClassNotFoundError(class_name=class_name, spec_type=SpecType.SCHEMAPACK)

dependencies: set[ClassName] = set()

for relation in class_definition.relations.values():
if _class_blacklist and relation.targetClass in _class_blacklist:
continue

dependencies.add(relation.targetClass)

nested_dependencies = identify_class_dependencies(
class_name=relation.targetClass,
schemapack=schemapack,
_class_blacklist=dependencies,
)
dependencies.update(nested_dependencies)

return dependencies


def downscope_schemapack(
*, schemapack: SchemaPack, classes_to_keep: set[ClassName]
) -> SchemaPack:
"""Downscope a schemapack to only contain the given classes.
Raises:
schemapack.Exceptions.ClassNotFoundError:
If one of the classes in classes_to_keep does not exist in the schemapack.
"""
try:
downscoped_classes = {
class_name: schemapack.classes[class_name] for class_name in classes_to_keep
}
except KeyError as error:
raise ClassNotFoundError(
class_name=error.args[0], spec_type=SpecType.SCHEMAPACK
) from error

return schemapack.model_copy(update={"classes": downscoped_classes})


def isolate_class(*, class_name: ClassName, schemapack: SchemaPack) -> SchemaPack:
"""Return a copy of the provided schemapack that is rooted to the specified class.
Expand All @@ -226,8 +290,13 @@ def isolate_class(*, class_name: ClassName, schemapack: SchemaPack) -> SchemaPac
schemapack.Exceptions.ClassNotFoundError:
If the class_name does not exist in the schemapack or datapack.
"""
if class_name not in schemapack.classes:
raise ClassNotFoundError(class_name=class_name, spec_type=SpecType.SCHEMAPACK)
dependencies = identify_class_dependencies(
class_name=class_name, schemapack=schemapack
)
dependencies.add(class_name)
schemapack = downscope_schemapack(
schemapack=schemapack, classes_to_keep=dependencies
)

return schemapack.model_copy(update={"rootClass": class_name})

Expand Down
11 changes: 11 additions & 0 deletions tests/test_isolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,14 @@ def test_isolate_class_non_exisiting_class():
schemapack=schemapack,
class_name="NonExistingClass",
)


def test_isolate_class_downscoping():
"""Test that unrelated classes are not included in the isolated schemapack."""
schemapack = load_schemapack(VALID_SCHEMAPACK_PATHS["unrelated_classes"])
expected_schemapack = load_schemapack(
VALID_SCHEMAPACK_PATHS["unrelated_classes_rooted"]
)

observed_schemapack = isolate_class(class_name="SomeClass", schemapack=schemapack)
assert observed_schemapack == expected_schemapack

0 comments on commit c67eee0

Please sign in to comment.