From 24a8b2f7688b0cc0c01a4a493affbce76cab3dea Mon Sep 17 00:00:00 2001 From: jwest Date: Wed, 25 Aug 2021 13:38:26 -0400 Subject: [PATCH] Feat: V0.67 Add SCOP2/ECOD polymer entity instance annotations --- .gitignore | 1 + HISTORY.txt | 2 + .../DictMethodEntityInstanceHelper.py | 125 ++++++++++++++++++ rcsb/utils/dictionary/__init__.py | 2 +- 4 files changed, 129 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index be2084b..3f30952 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ test-output/ CACHE/ LOGTOX .tox/ +.vscode/ # C extensions *.so diff --git a/HISTORY.txt b/HISTORY.txt index d0005f8..3ee3f78 100644 --- a/HISTORY.txt +++ b/HISTORY.txt @@ -56,4 +56,6 @@ 4-Aug-2021 - V0.64 Update dependencies and logging verbosity 13-Aug-2021 - V0.65 Remove possible redundancy in target cofactor data 13-Aug-2021 - V0.66 Remove possible redundancy in target reference assignments +25-Aug-2021 - V0.67 Add SCOP2/ECOD polymer entity instance annotations +# # \ No newline at end of file diff --git a/rcsb/utils/dictionary/DictMethodEntityInstanceHelper.py b/rcsb/utils/dictionary/DictMethodEntityInstanceHelper.py index cfd03da..a0a851d 100644 --- a/rcsb/utils/dictionary/DictMethodEntityInstanceHelper.py +++ b/rcsb/utils/dictionary/DictMethodEntityInstanceHelper.py @@ -610,6 +610,7 @@ def buildEntityInstanceFeatures(self, dataContainer, catName, **kwargs): # ii += 1 # + # --- SAbDab sabdabP = rP.getResource("SAbDabTargetFeatureProvider instance") if rP else None if sabdabP: @@ -1797,6 +1798,130 @@ def buildEntityInstanceAnnotations(self, dataContainer, catName, **kwargs): cObj.setValue(version, "assignment_version", ii) # ii += 1 + # JDW - Add SCOP2 family assignments + scopU = rP.getResource("Scop2Provider instance") if rP else None + if scopU: + version = scopU.getVersion() + for asymId, authAsymId in asymAuthIdD.items(): + if instTypeD[asymId] not in ["polymer", "branched"]: + continue + entityId = asymIdD[asymId] + # Family mappings + dL = scopU.getFamilyResidueRanges(entryId.upper(), authAsymId) + for (domId, familyId, _, _, _) in dL: + cObj.setValue(ii + 1, "ordinal", ii) + cObj.setValue(entryId, "entry_id", ii) + cObj.setValue(entityId, "entity_id", ii) + cObj.setValue(asymId, "asym_id", ii) + cObj.setValue(authAsymId, "auth_asym_id", ii) + cObj.setValue("SCOP2", "type", ii) + # + cObj.setValue(domId, "annotation_id", ii) + cObj.setValue(scopU.getName(familyId), "name", ii) + # + tL = [t if t is not None else "" for t in scopU.getNameLineage(familyId)] + cObj.setValue(";".join(tL), "annotation_lineage_name", ii) + idLinL = scopU.getIdLineage(familyId) + cObj.setValue(";".join([str(t) for t in idLinL]), "annotation_lineage_id", ii) + cObj.setValue(";".join([str(jj) for jj in range(1, len(idLinL) + 1)]), "annotation_lineage_depth", ii) + # + cObj.setValue("SCOP2", "provenance_source", ii) + cObj.setValue(version, "assignment_version", ii) + # + ii += 1 + # ------------ + # Add SCOP2 superfamily assignments + for asymId, authAsymId in asymAuthIdD.items(): + if instTypeD[asymId] not in ["polymer", "branched"]: + continue + entityId = asymIdD[asymId] + # Family mappings + dL = scopU.getSuperFamilyResidueRanges(entryId.lower(), authAsymId) + for (domId, superfamilyId, _, _, _) in dL: + + cObj.setValue(ii + 1, "ordinal", ii) + cObj.setValue(entryId, "entry_id", ii) + cObj.setValue(entityId, "entity_id", ii) + cObj.setValue(asymId, "asym_id", ii) + cObj.setValue(authAsymId, "auth_asym_id", ii) + cObj.setValue("SCOP2", "type", ii) + # + cObj.setValue(domId, "annotation_id", ii) + cObj.setValue(scopU.getName(superfamilyId), "name", ii) + # + tL = [t if t is not None else "" for t in scopU.getNameLineage(superfamilyId)] + cObj.setValue(";".join(tL), "annotation_lineage_name", ii) + idLinL = scopU.getIdLineage(superfamilyId) + cObj.setValue(";".join([str(t) for t in idLinL]), "annotation_lineage_id", ii) + cObj.setValue(";".join([str(jj) for jj in range(1, len(idLinL) + 1)]), "annotation_lineage_depth", ii) + # + cObj.setValue("SCOP2", "provenance_source", ii) + cObj.setValue(version, "assignment_version", ii) + # + ii += 1 + # ---- + # Add SCOP2B superfamily assignments + for asymId, authAsymId in asymAuthIdD.items(): + if instTypeD[asymId] not in ["polymer", "branched"]: + continue + entityId = asymIdD[asymId] + # Family mappings + dL = scopU.getSuperFamilyResidueRanges2B(entryId.lower(), authAsymId) + for (domId, superfamilyId, _, _, _) in dL: + cObj.setValue(ii + 1, "ordinal", ii) + cObj.setValue(entryId, "entry_id", ii) + cObj.setValue(entityId, "entity_id", ii) + cObj.setValue(asymId, "asym_id", ii) + cObj.setValue(authAsymId, "auth_asym_id", ii) + cObj.setValue("SCOP2", "type", ii) + # + cObj.setValue(domId, "annotation_id", ii) + cObj.setValue(scopU.getName(superfamilyId), "name", ii) + # + tL = [t if t is not None else "" for t in scopU.getNameLineage(superfamilyId)] + cObj.setValue(";".join(tL), "annotation_lineage_name", ii) + idLinL = scopU.getIdLineage(superfamilyId) + cObj.setValue(";".join([str(t) for t in idLinL]), "annotation_lineage_id", ii) + cObj.setValue(";".join([str(jj) for jj in range(1, len(idLinL) + 1)]), "annotation_lineage_depth", ii) + # + cObj.setValue("SCOP2B", "provenance_source", ii) + cObj.setValue(version, "assignment_version", ii) + # + ii += 1 + # ------------ + # ECOD assignments - + ecodU = rP.getResource("EcodProvider instance") if rP else None + if ecodU: + version = ecodU.getVersion() + for asymId, authAsymId in asymAuthIdD.items(): + if instTypeD[asymId] not in ["polymer", "branched"]: + continue + entityId = asymIdD[asymId] + # Family mappings + dL = ecodU.getFamilyResidueRanges(entryId.lower(), authAsymId) + for (domId, familyId, _, _, _) in dL: + cObj.setValue(ii + 1, "ordinal", ii) + cObj.setValue(entryId, "entry_id", ii) + cObj.setValue(entityId, "entity_id", ii) + cObj.setValue(asymId, "asym_id", ii) + cObj.setValue(authAsymId, "auth_asym_id", ii) + cObj.setValue("ECOD", "type", ii) + # + fName = ecodU.getName(familyId)[3:] + cObj.setValue(domId, "annotation_id", ii) + cObj.setValue(fName, "name", ii) + # + + tL = [t if t is not None else "" for t in ecodU.getNameLineage(familyId)] + cObj.setValue(";".join(tL), "annotation_lineage_name", ii) + idLinL = ecodU.getIdLineage(familyId) + cObj.setValue(";".join([str(t) for t in idLinL]), "annotation_lineage_id", ii) + cObj.setValue(";".join([str(jj) for jj in range(1, len(idLinL) + 1)]), "annotation_lineage_depth", ii) + # + cObj.setValue("ECOD", "provenance_source", ii) + cObj.setValue(version, "assignment_version", ii) + # + ii += 1 # ------------ # Add covalent attachment property npbD = self.__commonU.getBoundNonpolymersByInstance(dataContainer) diff --git a/rcsb/utils/dictionary/__init__.py b/rcsb/utils/dictionary/__init__.py index feae550..1d76e5a 100644 --- a/rcsb/utils/dictionary/__init__.py +++ b/rcsb/utils/dictionary/__init__.py @@ -2,4 +2,4 @@ __author__ = "John Westbrook" __email__ = "john.westbrook@rcsb.org" __license__ = "Apache 2.0" -__version__ = "0.66" +__version__ = "0.67"