Merge pull request #44 from Peyman-N/pipeline_namespace

Adoption python pipeline to new namespace
openMetadataInitiative · Feb 6, 2025 · 0085b83 · 0085b83
2 parents f0ed26f + 5ff2fc1
commit 0085b83
Show file tree

Hide file tree

Showing 6 changed files with 92 additions and 34 deletions.
diff --git a/build.py b/build.py
@@ -47,6 +47,7 @@
             instances[version][instance_data["@type"]].append(instance_data)
 
 python_modules = defaultdict(list)
+
 for schema_version in schema_loader.get_schema_versions():
 
     # Step 3 - find all involved schemas for the current version
@@ -55,8 +56,10 @@
     # Step 4a - figure out which schemas are embedded and which are linked
     embedded = set()
     linked = set()
+    class_to_module_map = {}
     for schema_file_path in schemas_file_paths:
         emb, lnk = PythonBuilder(schema_file_path, schema_loader.schemas_sources).get_edges()
+        class_to_module_map=PythonBuilder(schema_file_path, schema_loader.schemas_sources).update_class_to_module_map(class_to_module_map)
         embedded.update(emb)
         linked.update(lnk)
     conflicts = linked.intersection(embedded)
@@ -76,7 +79,7 @@
             schema_loader.schemas_sources,
             instances=instances.get(schema_version, None),
             additional_methods=additional_methods,
-        ).build(embedded=embedded)
+        ).build(embedded=embedded,class_to_module_map=class_to_module_map)
 
         parts = module_path.split(".")
         parent_path = ".".join(parts[:-1])

diff --git a/pipeline/src/base.py b/pipeline/src/base.py
@@ -73,7 +73,10 @@ def to_jsonld(
 
         data = {"@type": self.type_}
         if with_context:
-            data["@context"] = {"@vocab": "https://openminds.ebrains.eu/vocab/"}
+            if self.type_.startswith("https://openminds.ebrains.eu/"):
+                data["@context"] = {"@vocab": "https://openminds.ebrains.eu/vocab/"}
+            else:
+                data["@context"] = {"@vocab": "https://openminds.om-i.org/props/"}
         if hasattr(self, "id") and self.id:
             data["@id"] = self.id
         for property in self.__class__.properties:

diff --git a/pipeline/src/collection.py b/pipeline/src/collection.py
@@ -85,6 +85,12 @@ def save(self, path, individual_files=False, include_empty_properties=False):
         # we first re-add all child nodes to the collection.
         # This is probably not the most elegant or fast way to do this, but it is simple and robust.
         for node in tuple(self.nodes.values()):
+
+            if node.type_.startswith("https://openminds.ebrains.eu/"):
+                data_context = {"@vocab": "https://openminds.ebrains.eu/vocab/"}
+            else:
+                data_context = {"@vocab": "https://openminds.om-i.org/props/"}
+
             for linked_node in node.links:
                 self._add_node(linked_node)
         # Now we can actually save the nodes
@@ -97,7 +103,7 @@ def save(self, path, individual_files=False, include_empty_properties=False):
                 if parent_dir:
                     os.makedirs(parent_dir, exist_ok=True)
             data = {
-                "@context": {"@vocab": "https://openminds.ebrains.eu/vocab/"},
+                "@context": data_context,
                 "@graph": [
                     node.to_jsonld(
                         embed_linked_nodes=False, include_empty_properties=include_empty_properties, with_context=False
@@ -154,9 +160,13 @@ def load(self, *paths):
             with open(path, "r") as fp:
                 data = json.load(fp)
             if "@graph" in data:
+                if data["@context"]["@vocab"].startswith("https://openminds.ebrains.eu/"):
+                    version="v3"
+                else:
+                    version="latest"
                 for item in data["@graph"]:
                     if "@type" in item:
-                        cls = lookup_type(item["@type"])
+                        cls = lookup_type(item["@type"],version=version)
                         node = cls.from_jsonld(item)
                     else:
                         # allow links to metadata instances outside this collection

diff --git a/pipeline/src/module_template.py.txt b/pipeline/src/module_template.py.txt
@@ -16,7 +16,7 @@ class {{ class_name }}({{ base_class }}):
     """
     type_ = "{{ openminds_type }}"
     context = {
-        "@vocab": "https://openminds.ebrains.eu/vocab/"
+        "@vocab": "{{ context_vocab }}"
     }
     schema_version = "{{ schema_version }}"
 
@@ -61,4 +61,4 @@ class {{ class_name }}({{ base_class }}):
         {{key}}={{value}},
     {%- endif %}
     {% endfor -%}
-){% endfor %}
+){% endfor %}
diff --git a/pipeline/tests/test_regressions.py b/pipeline/tests/test_regressions.py
@@ -12,7 +12,7 @@ def test_issue_0002():
 
     node = build_fake_node(omcore.Person)
     data = node.to_jsonld()
-    assert data["@type"] == "https://openminds.ebrains.eu/core/Person"
+    assert data["@type"] == "https://openminds.om-i.org/types/Person"
 
 
 def test_issue_0003():
@@ -38,21 +38,22 @@ def test_issue_0003():
     )
     # on export, a single item should be wrapped in a list, where the property expects an array
     expected = {
-        "@context": {"@vocab": "https://openminds.ebrains.eu/vocab/"},
-        "@type": "https://openminds.ebrains.eu/core/FileArchive",
+        "@context": {"@vocab": "https://openminds.om-i.org/props/"},
+        "@type": "https://openminds.om-i.org/types/FileArchive",
         "IRI": "http://example.com/archive.zip",
         "format": {
-            "@type": "https://openminds.ebrains.eu/core/ContentType",
+            "@type": "https://openminds.om-i.org/types/ContentType",
             "name": "application/zip",
-        },
+            },
         "sourceData": [
             {
-                "@type": "https://openminds.ebrains.eu/core/File",
+                "@type": "https://openminds.om-i.org/types/File",
                 "IRI": "http://example.com/some_file.txt",
                 "name": "some_file.txt",
             }
         ],
     }
+
     assert (
         node1.to_jsonld(include_empty_properties=False) == node2.to_jsonld(include_empty_properties=False) == expected
     )
@@ -89,20 +90,20 @@ def test_issue0007():
 
     actual = person.to_jsonld(include_empty_properties=False, embed_linked_nodes=False, with_context=True)
     expected = {
-        "@context": {"@vocab": "https://openminds.ebrains.eu/vocab/"},
+        "@context": {"@vocab": "https://openminds.om-i.org/props/"},
         "@id": "_:001",
-        "@type": "https://openminds.ebrains.eu/core/Person",
+        "@type": "https://openminds.om-i.org/types/Person",
         "familyName": "Professor",
         "givenName": "A",
         "affiliation": [
             {
-                "@type": "https://openminds.ebrains.eu/core/Affiliation",
+                "@type": "https://openminds.om-i.org/types/Affiliation",
                 "memberOf": {
                     "@id": "_:002"
                 },
             },
             {
-                "@type": "https://openminds.ebrains.eu/core/Affiliation",
+                "@type": "https://openminds.om-i.org/types/Affiliation",
                 "memberOf": {
                     "@id": "_:003"
                 },
@@ -119,20 +120,20 @@ def test_issue0007():
         saved_data = json.load(fp)
     os.remove("issue0007.jsonld")
     expected_saved_data = {
-        "@context": {"@vocab": "https://openminds.ebrains.eu/vocab/"},
+        "@context": {"@vocab": "https://openminds.om-i.org/props/"},
         "@graph": [
             {
                 "@id": "_:001",
-                "@type": "https://openminds.ebrains.eu/core/Person",
+                "@type": "https://openminds.om-i.org/types/Person",
                 "affiliation": [
                     {
-                        "@type": "https://openminds.ebrains.eu/core/Affiliation",
+                        "@type": "https://openminds.om-i.org/types/Affiliation",
                         "memberOf": {
                             "@id": "_:002"
                         },
                     },
                     {
-                        "@type": "https://openminds.ebrains.eu/core/Affiliation",
+                        "@type": "https://openminds.om-i.org/types/Affiliation",
                         "memberOf": {
                             "@id": "_:003"
                         },
@@ -143,12 +144,12 @@ def test_issue0007():
             },
             {
                 "@id": "_:002",
-                "@type": "https://openminds.ebrains.eu/core/Organization",
+                "@type": "https://openminds.om-i.org/types/Organization",
                 "fullName": "University of This Place",
             },
             {
                 "@id": "_:003",
-                "@type": "https://openminds.ebrains.eu/core/Organization",
+                "@type": "https://openminds.om-i.org/types/Organization",
                 "fullName": "University of That Place",
             },
         ],
@@ -170,12 +171,12 @@ def test_issue0008():
     )
     actual = person.to_jsonld(include_empty_properties=False, embed_linked_nodes=False, with_context=True)
     expected = {
-        "@context": {"@vocab": "https://openminds.ebrains.eu/vocab/"},
+        "@context": {"@vocab": "https://openminds.om-i.org/props/"},
         "@id": "_:002",
-        "@type": "https://openminds.ebrains.eu/core/Person",
+        "@type": "https://openminds.om-i.org/types/Person",
         "affiliation": [
             {
-                "@type": "https://openminds.ebrains.eu/core/Affiliation",
+                "@type": "https://openminds.om-i.org/types/Affiliation",
                 "endDate": "2023-09-30",
                 "memberOf": {
                     "@id": "_:001"

diff --git a/pipeline/translator.py b/pipeline/translator.py
@@ -25,7 +25,7 @@ def generate_python_name(json_name, allow_multiple=False):
     python_name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", json_name.strip())
     python_name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", python_name).lower()
     replacements = [
-        ("-", "_"), (".", "_"), ("+", "plus"), ("#", "sharp"), (",", "comma"), ("(", ""), (")", "")
+        ("-", "_"), (".", "_"),("'","_prime_"), ("+", "plus"), ("#", "sharp"), (",", "comma"), ("(", ""), (")", "")
     ]
     for before, after in replacements:
         python_name = python_name.replace(before, after)
@@ -57,14 +57,19 @@ class PythonBuilder(object):
 
     def __init__(self, schema_file_path: str, root_path: str, instances: Optional[dict] = None,
                  additional_methods: Optional[dict] = None):
-        self.template_name = "src/module_template.py.txt"
         self.env = Environment(
             loader=FileSystemLoader(os.path.dirname(os.path.realpath(__file__))), autoescape=select_autoescape()
         )
         _relative_path_without_extension = (
             schema_file_path[len(root_path) + 1 :].replace(".schema.omi.json", "").split("/")
         )
         self.version = _relative_path_without_extension[0]
+        self.template_name = "src/module_template.py.txt"
+        if self.version in ["v3.0" , "v2.0" , "v1.0"]:
+            self.context_vocab = "https://openminds.ebrains.eu/vocab/"
+        else:
+            self.context_vocab = "https://openminds.om-i.org/props/"
+
         self.relative_path_without_extension = [
             generate_python_name(part) for part in _relative_path_without_extension[1:]
         ]
@@ -83,7 +88,7 @@ def _version_module(self):
     def _target_file_without_extension(self) -> str:
         return os.path.join(self._version_module, "/".join(self.relative_path_without_extension))
 
-    def translate(self, embedded=None):
+    def translate(self, embedded=None, class_to_module_map=None):
         def get_type(property):
             type_map = {
                 "string": "str",
@@ -100,17 +105,23 @@ def get_type(property):
             if "_linkedTypes" in property:
                 types = []
                 for item in property["_linkedTypes"]:
-                    openminds_module, class_name = item.split("/")[-2:]
-                    openminds_module = generate_python_name(openminds_module)
+                    openminds_module_from_type, class_name = item.split("/")[-2:]
+                    if isinstance(class_to_module_map,dict) and (class_name in class_to_module_map):
+                        openminds_module = generate_python_name(class_to_module_map[class_name])
+                    else:
+                        openminds_module = generate_python_name(openminds_module_from_type)
                     types.append(f"openminds.{self._version_module}.{openminds_module}.{class_name}")
                 if len(types) == 1:
                     types = f'"{types[0]}"'
                 return types
             elif "_embeddedTypes" in property:
                 types = []
                 for item in property["_embeddedTypes"]:
-                    openminds_module, class_name = item.split("/")[-2:]
-                    openminds_module = generate_python_name(openminds_module)
+                    openminds_module_from_type, class_name = item.split("/")[-2:]
+                    if isinstance(class_to_module_map,dict) and (class_name in class_to_module_map):
+                        openminds_module = generate_python_name(class_to_module_map[class_name])
+                    else:
+                        openminds_module = generate_python_name(openminds_module_from_type)
                     types.append(f"openminds.{self._version_module}.{openminds_module}.{class_name}")
                 if len(types) == 1:
                     types = f'"{types[0]}"'
@@ -201,6 +212,7 @@ def filter_instance(instance):
             "class_name": class_name,
             "openminds_type": openminds_type,
             "schema_version": self.version,
+            "context_vocab": self.context_vocab,
             "properties": properties,
             "additional_methods": "",
             "instances": instances
@@ -233,11 +245,11 @@ def filter_instance(instance):
             if extra_imports:
                 self.context["preamble"] = "\n".join(sorted(extra_imports))
 
-    def build(self, embedded=None):
+    def build(self, embedded=None, class_to_module_map=None):
         target_file_path = os.path.join("target", "openminds", f"{self._target_file_without_extension()}.py")
         os.makedirs(os.path.dirname(target_file_path), exist_ok=True)
 
-        self.translate(embedded=embedded)
+        self.translate(embedded=embedded, class_to_module_map=class_to_module_map)
 
         with open(target_file_path, "w") as target_file:
             contents = self.env.get_template(self.template_name).render(self.context)
@@ -252,3 +264,32 @@ def get_edges(self):
             embedded.update(property.get("_embeddedTypes", []))
             linked.update(property.get("_linkedTypes", []))
         return embedded, linked
+
+    def update_class_to_module_map(self,class_to_module_map):
+        """
+        Updates a dictionary with the class name and its corresponding module based on the schemas.
+        
+        This method extracts the class name and module from the `_schema_payload` attribute 
+        and updates the provided dictionary (`class_to_module_map`) with a mapping of 
+        the class name to its module. If the `_module` key exists in `_schema_payload` 
+        (which was introduced in version 4 of openMINDS), its value is used as the module. 
+        Otherwise, the module is derived from the second-to-last component of the `_type` 
+        field in `_schema_payload`.
+        
+        Args:
+            class_to_module_map (dict): A dictionary where keys are class names and values 
+                                      are their corresponding modules.
+        
+        Returns:
+            dict: The updated dictionary with the class name and module mapping.
+        """
+        schema_type=self._schema_payload["_type"]
+        class_name=schema_type.split("/")[-1]
+        if "_module" in self._schema_payload:
+            module=self._schema_payload["_module"]
+        else:
+            module=schema_type.split("/")[-2]
+
+        class_to_module_map[class_name]=module
+
+        return class_to_module_map