From f216c88cd68def4369b52da8820d54dcb7fde534 Mon Sep 17 00:00:00 2001 From: Pau Ferri-Vicedo Date: Tue, 28 May 2024 16:39:51 -0400 Subject: [PATCH 01/13] Initial functions for the MinDistanceCationAnionFitness class in the MC docking --- VOID/fitness/__init__.py | 14 ++++++- VOID/fitness/threshold.py | 80 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 2 deletions(-) diff --git a/VOID/fitness/__init__.py b/VOID/fitness/__init__.py index 29d18cf..b9f2645 100644 --- a/VOID/fitness/__init__.py +++ b/VOID/fitness/__init__.py @@ -1,10 +1,20 @@ from .base import Fitness -from .threshold import MinDistanceFitness, MeanDistanceFitness, SumInvDistanceFitness -from .target import MinDistanceGaussianTarget, MeanDistanceGaussianTarget, MaxDistanceGaussianTarget +from .threshold import ( + MinDistanceFitness, + MeanDistanceFitness, + SumInvDistanceFitness, + MinDistanceCationAnionFitness, +) +from .target import ( + MinDistanceGaussianTarget, + MeanDistanceGaussianTarget, + MaxDistanceGaussianTarget, +) from .union import MultipleFitness __all__ = [ MinDistanceFitness, + MinDistanceCationAnionFitness, MeanDistanceFitness, SumInvDistanceFitness, MinDistanceGaussianTarget, diff --git a/VOID/fitness/threshold.py b/VOID/fitness/threshold.py index 5d85f23..cecc4ba 100644 --- a/VOID/fitness/threshold.py +++ b/VOID/fitness/threshold.py @@ -2,6 +2,8 @@ from .base import Fitness +from pymatgen.core.sites import Site + THRESHOLD = 1.5 DEFAULT_STRUCTURE = "complex" @@ -60,6 +62,62 @@ def get_distances(self, complex): else: raise ValueError("structure type not supported") + def get_zeolite_oxygens(self, pose): + """Collect all the O atoms in the structure.""" + return [index for index, site in enumerate(pose) if site.species_string == "O"] + + def find_cation_index(self, pose, distance_matrices): + """Identify the cation position in the guest.""" + for index, site in enumerate(pose): + element = site.species_string + if element == "C": + bonds = sum(1 for dist in distance_matrices[index] if 0 < dist < 1.6) + if bonds == 3: + return index + return None + + def find_acid_sites(self, pose, distance_matrices, zeolite_oxygens): + """Identify the acid sites in the zeolite.""" + acid_oxygens = [] + acid_al_indexes = [] + + for index, site in enumerate(pose): + if site.species_string == "Al": + candidate_oxygens = [ + dist_index + for dist_index, dist in enumerate(distance_matrices[index]) + if 0 < dist < 1.8 and dist_index in zeolite_oxygens + ] + if len(candidate_oxygens) == 4 and all( + all( + not (bond_dist < 1.15 and bond_dist != 0.0) + for bond_dist in distance_matrices[ox_index] + ) + for ox_index in candidate_oxygens + ): # 1.15 accounts for O-H bond + acid_oxygens.append(candidate_oxygens) + acid_al_indexes.append(index) + + return acid_oxygens, acid_al_indexes + + def get_catan_distances(self, acid_oxygens, cation_index, distance_matrices): + """Check the cation-anion distances for the different acid sites in the zeolite.""" + distances_catan = [] + for acid_al in acid_oxygens: + distances_cation_anion = [ + distance_matrices[cation_index][ox_index] for ox_index in acid_al + ] + print( + "Distances between cation and acid oxygens are:", distances_cation_anion + ) + distances_catan.append(distances_cation_anion) + # if any(dist < 2.0 for dist in distances_cation_anion): + # print("Optimal distance found! Aborting the run") + # return True + # return False + + return distances_catan + def normalize(self, value): if self.step: return 0 if value > 0 else -np.inf @@ -74,6 +132,28 @@ def __call__(self, complex): return self.normalize(self.get_distances(complex).min() - self.threshold) +class MinDistanceCationAnionFitness(ThresholdFitness): + PARSER_NAME = "min_catan_distance" + HELP = "Complexes have positive score if the minimum distance between host anion and guest cation is above the given threshold" + + def __call__(self, complex): + # print(complex.pose) + pose = complex.pose + distance_matrices = complex.pose.distance_matrix + zeolite_oxygens = self.get_zeolite_oxygens(pose) + cation_index = self.find_cation_index( + pose, + distance_matrices, + ) + acid_sites, acid_al_indexes = self.find_acid_sites( + pose, distance_matrices, zeolite_oxygens + ) + return self.normalize( + min(self.get_catan_distances(acid_sites, cation_index, distance_matrices)) + - self.threshold + ) + + class MeanDistanceFitness(ThresholdFitness): PARSER_NAME = "mean_distance" HELP = "Complexes have positive score if the mean distance between host and guest is above the given threshold" From 452ca2eae9bf9e902984921d44148c46182799b4 Mon Sep 17 00:00:00 2001 From: Pau Ferri-Vicedo Date: Wed, 29 May 2024 12:03:00 -0400 Subject: [PATCH 02/13] cation/anion identifier functions work through atomtypes dict, get_catan_distances now combines MinDistanceFitness + MinDistanceCationAnionFitness, works --- VOID/fitness/threshold.py | 231 ++++++++++++++++++++++++++++++-------- 1 file changed, 183 insertions(+), 48 deletions(-) diff --git a/VOID/fitness/threshold.py b/VOID/fitness/threshold.py index cecc4ba..6a2d541 100644 --- a/VOID/fitness/threshold.py +++ b/VOID/fitness/threshold.py @@ -62,61 +62,193 @@ def get_distances(self, complex): else: raise ValueError("structure type not supported") - def get_zeolite_oxygens(self, pose): - """Collect all the O atoms in the structure.""" - return [index for index, site in enumerate(pose) if site.species_string == "O"] + def get_atomtypes_indexes(self, pose): + """Collect all the atomtypes indexes in the structure.""" + atomtypes_indexes = {} - def find_cation_index(self, pose, distance_matrices): - """Identify the cation position in the guest.""" for index, site in enumerate(pose): - element = site.species_string - if element == "C": - bonds = sum(1 for dist in distance_matrices[index] if 0 < dist < 1.6) - if bonds == 3: - return index - return None - - def find_acid_sites(self, pose, distance_matrices, zeolite_oxygens): + atom_type = site.species_string + feature = site.label # labels the atomtype as host or guest + if atom_type not in atomtypes_indexes: + atomtypes_indexes[atom_type] = [] + atomtypes_indexes[atom_type].append((index, feature)) + + return atomtypes_indexes + + def find_cation_index(self, distance_matrices, atomtypes_indexes): + """Identify the cation position in the guest.""" + + carbon_indexes = [ + idx for idx, lbl in atomtypes_indexes.get("C", []) if lbl == "guest" + ] + nitrogen_indexes = [ + idx for idx, lbl in atomtypes_indexes.get("N", []) if lbl == "guest" + ] + oxygen_indexes = [ + idx for idx, lbl in atomtypes_indexes.get("O", []) if lbl == "guest" + ] + hydrogen_indexes = [ + idx for idx, lbl in atomtypes_indexes.get("H", []) if lbl == "guest" + ] + for carbon_index in carbon_indexes: + bonds = 0 + for i, dist in enumerate(distance_matrices[carbon_index]): + if ( + i in carbon_indexes + or i in hydrogen_indexes + or i in nitrogen_indexes + or i in oxygen_indexes + ): + if 0 < dist < 1.15 and i in hydrogen_indexes: # exp C-H dist 1.09 A + bonds += 1 + elif ( + 1.5 < dist < 1.6 and i in carbon_indexes + ): # exp C-C dist 1.55 A + bonds += 1 + elif ( + 1.29 < dist < 1.39 and i in carbon_indexes + ): # exp C=C dist 1.34 A + bonds += 2 + elif ( + 1.15 < dist < 1.25 and i in carbon_indexes + ): # exp C≡C dist 1.20 A + bonds += 3 + elif ( + 1.38 < dist < 1.48 and i in nitrogen_indexes + ): # exp C-N dist 1.43 A + bonds += 1 + elif ( + 1.33 < dist < 1.43 and i in nitrogen_indexes + ): # exp C=N dist 1.38 A + bonds += 2 + elif ( + 1.11 < dist < 1.21 and i in nitrogen_indexes + ): # exp C≡N dist 1.16 A + bonds += 3 + elif ( + 1.38 < dist < 1.48 and i in oxygen_indexes + ): # exp C-O dist 1.43 A + bonds += 1 + elif ( + 1.18 < dist < 1.28 and i in oxygen_indexes + ): # exp C=O dist 1.28 A + bonds += 2 + elif ( + 1.08 < dist < 1.18 and i in oxygen_indexes + ): # exp C≡O dist 1.18 A + bonds += 3 + + if bonds == 3: + return carbon_index + + for nitrogen_index in nitrogen_indexes: + bonds = 0 + for i, dist in enumerate(distance_matrices[nitrogen_index]): + if ( + i in carbon_indexes + or i in hydrogen_indexes + or i in nitrogen_indexes + or i in oxygen_indexes + ): + if 0 < dist < 1.05 and i in hydrogen_indexes: # exp N-H dist 1.00 A + bonds += 1 + elif ( + 1.42 < dist < 1.52 and i in carbon_indexes + ): # exp N-N dist 1.47 A + bonds += 1 + elif ( + 1.19 < dist < 1.29 and i in carbon_indexes + ): # exp N=N dist 1.24 A + bonds += 2 + elif ( + 1.05 < dist < 1.15 and i in carbon_indexes + ): # exp N≡N dist 1.10 A + bonds += 3 + elif ( + 1.38 < dist < 1.48 and i in nitrogen_indexes + ): # exp C-N dist 1.43 A + bonds += 1 + elif ( + 1.33 < dist < 1.43 and i in nitrogen_indexes + ): # exp C=N dist 1.38 A + bonds += 2 + elif ( + 1.11 < dist < 1.21 and i in nitrogen_indexes + ): # exp C≡N dist 1.16 A + bonds += 3 + elif ( + 1.39 < dist < 1.49 and i in oxygen_indexes + ): # exp N-O dist 1.44 A + bonds += 1 + elif ( + 1.15 < dist < 1.25 and i in oxygen_indexes + ): # exp C=O dist 1.20 A + bonds += 2 + + if bonds == 4: + return nitrogen_index + + # If no cation index is found, raise an error with a custom message + raise ValueError( + "Cation index could not be found. Please check the molecule you are docking." + ) + + def find_acid_sites(self, distance_matrices, atomtypes_indexes): """Identify the acid sites in the zeolite.""" acid_oxygens = [] acid_al_indexes = [] - for index, site in enumerate(pose): - if site.species_string == "Al": - candidate_oxygens = [ - dist_index - for dist_index, dist in enumerate(distance_matrices[index]) - if 0 < dist < 1.8 and dist_index in zeolite_oxygens - ] - if len(candidate_oxygens) == 4 and all( - all( - not (bond_dist < 1.15 and bond_dist != 0.0) - for bond_dist in distance_matrices[ox_index] - ) - for ox_index in candidate_oxygens - ): # 1.15 accounts for O-H bond - acid_oxygens.append(candidate_oxygens) - acid_al_indexes.append(index) + host_oxygens = [ + idx for idx, lbl in atomtypes_indexes.get("O", []) if lbl == "host" + ] + host_aluminum = [ + idx for idx, lbl in atomtypes_indexes.get("Al", []) if lbl == "host" + ] + ## Room to add more metals if needed + + for al_index in host_aluminum: + candidate_oxygens = [ + dist_index + for dist_index, dist in enumerate(distance_matrices[al_index]) + if 0 < dist < 1.8 and dist_index in host_oxygens + ] + if len(candidate_oxygens) == 4 and all( + all( + not (bond_dist < 1.10 and bond_dist != 0.0) + for bond_dist in distance_matrices[ox_index] + ) + for ox_index in candidate_oxygens + ): # 1.10 accounts for O-H bond + acid_oxygens.append(candidate_oxygens) + acid_al_indexes.append(al_index) return acid_oxygens, acid_al_indexes - def get_catan_distances(self, acid_oxygens, cation_index, distance_matrices): + def get_catan_distances( + self, acid_oxygens, cation_index, distance_matrices, complex + ): """Check the cation-anion distances for the different acid sites in the zeolite.""" distances_catan = [] for acid_al in acid_oxygens: distances_cation_anion = [ distance_matrices[cation_index][ox_index] for ox_index in acid_al ] + + distances_catan.append(distances_cation_anion) + + if ( + any(dist < 2.0 for sublist in distances_catan for dist in sublist) + and self.normalize(self.get_distances(complex).min() - self.threshold) > 0 + ): + print("Optimal distance found! Aborting the run") print( - "Distances between cation and acid oxygens are:", distances_cation_anion + "Distances between cation and acid oxygens are:", + distances_catan, ) - distances_catan.append(distances_cation_anion) - # if any(dist < 2.0 for dist in distances_cation_anion): - # print("Optimal distance found! Aborting the run") - # return True - # return False + return True, distances_catan - return distances_catan + else: + return False, distances_catan def normalize(self, value): if self.step: @@ -134,25 +266,28 @@ def __call__(self, complex): class MinDistanceCationAnionFitness(ThresholdFitness): PARSER_NAME = "min_catan_distance" - HELP = "Complexes have positive score if the minimum distance between host anion and guest cation is above the given threshold" + HELP = "Complexes have positive score if the minimum distance between host anion and guest cation is below the given threshold plus Complexes have positive score if the minimum distance between host and guest is above the given threshold" def __call__(self, complex): - # print(complex.pose) pose = complex.pose distance_matrices = complex.pose.distance_matrix - zeolite_oxygens = self.get_zeolite_oxygens(pose) - cation_index = self.find_cation_index( - pose, - distance_matrices, - ) + atomtypes_indexes = self.get_atomtypes_indexes(pose) + + cation_index = self.find_cation_index(distance_matrices, atomtypes_indexes) acid_sites, acid_al_indexes = self.find_acid_sites( - pose, distance_matrices, zeolite_oxygens + distance_matrices, atomtypes_indexes ) - return self.normalize( - min(self.get_catan_distances(acid_sites, cation_index, distance_matrices)) - - self.threshold + + converged, distances = self.get_catan_distances( + acid_sites, cation_index, distance_matrices, complex ) + if converged: + return 1 + + else: + return -np.inf + class MeanDistanceFitness(ThresholdFitness): PARSER_NAME = "mean_distance" From 0cbd24a18b09858ea69b2049ce25056d4d9d1202 Mon Sep 17 00:00:00 2001 From: Pau Ferri-Vicedo Date: Wed, 29 May 2024 14:37:42 -0400 Subject: [PATCH 03/13] added comments and docstrings to all functions in threshold.py --- VOID/fitness/threshold.py | 73 +++++++++++++++++++++++++++++++++++---- 1 file changed, 67 insertions(+), 6 deletions(-) diff --git a/VOID/fitness/threshold.py b/VOID/fitness/threshold.py index 6a2d541..9d33a00 100644 --- a/VOID/fitness/threshold.py +++ b/VOID/fitness/threshold.py @@ -63,7 +63,14 @@ def get_distances(self, complex): raise ValueError("structure type not supported") def get_atomtypes_indexes(self, pose): - """Collect all the atomtypes indexes in the structure.""" + """Collect all the atomtypes indexes in the structure + + Args: + pose (structure): Structure object containing atomtype, xyz, and host/guest features + + Returns: + atomtypes_indexes (dict): dictionary comprising information about each atomtype, its indexes and the "host"/"guest" nature + """ atomtypes_indexes = {} for index, site in enumerate(pose): @@ -76,8 +83,20 @@ def get_atomtypes_indexes(self, pose): return atomtypes_indexes def find_cation_index(self, distance_matrices, atomtypes_indexes): - """Identify the cation position in the guest.""" + """Identify the cation position in the guest molecule. This function is intended for monocationic species. + + Args: + distance_matrices (list): List of distance matrix lists for each atom with respect to all other atoms. + atomtypes_indexes (dict): Dictionary comprising information about each atomtype, its indexes, and its "host"/"guest" nature. + Raises: + ValueError: If the code cannot find the cation on the guest molecule. + + Returns: + int: Index corresponding to the carbon (C) or nitrogen (N) atom that hosts the positive charge on the molecule. + """ + + # First retrieves the indexes for each atomtype, more can be added if needed carbon_indexes = [ idx for idx, lbl in atomtypes_indexes.get("C", []) if lbl == "guest" ] @@ -90,8 +109,11 @@ def find_cation_index(self, distance_matrices, atomtypes_indexes): hydrogen_indexes = [ idx for idx, lbl in atomtypes_indexes.get("H", []) if lbl == "guest" ] + + # Checks the numbers of bonds for each C atom, takes into account double and triple bonds for carbon_index in carbon_indexes: bonds = 0 + # Check all the dsitances and bond types for C-H, C-C, C-N, C-O for i, dist in enumerate(distance_matrices[carbon_index]): if ( i in carbon_indexes @@ -138,11 +160,14 @@ def find_cation_index(self, distance_matrices, atomtypes_indexes): ): # exp C≡O dist 1.18 A bonds += 3 + # If a C atom has only 3 bonds, identifies this atom as the positive charge holder if bonds == 3: return carbon_index + # Checks the numbers of bonds for each N atom, takes into account double and triple bonds for nitrogen_index in nitrogen_indexes: bonds = 0 + # Checks all distances and bond types for N-H, N-C, N-N and N-O for i, dist in enumerate(distance_matrices[nitrogen_index]): if ( i in carbon_indexes @@ -194,7 +219,17 @@ def find_cation_index(self, distance_matrices, atomtypes_indexes): ) def find_acid_sites(self, distance_matrices, atomtypes_indexes): - """Identify the acid sites in the zeolite.""" + """Identify the acid sites in the host structure. + + Args: + distance_matrices (list): List of distance matrix lists for each atom with respect to all other atoms. + atomtypes_indexes (dict): Dictionary containing information about each atomtype, its indexes, and its "host"/"guest" nature. + + Returns: + list: List of lists with 4 oxygen atom indexes attached to each acid site. These oxygen atoms account for the negative charge to be compensated by the cation. + list: List of aluminum (Al) indexes, whose bonded oxygens are not compensated by any proton. (Not used throughout the code at present.) + """ + acid_oxygens = [] acid_al_indexes = [] @@ -204,14 +239,18 @@ def find_acid_sites(self, distance_matrices, atomtypes_indexes): host_aluminum = [ idx for idx, lbl in atomtypes_indexes.get("Al", []) if lbl == "host" ] - ## Room to add more metals if needed + ## Room to add more metals if needed for docking to metal slides or so + # Checks every Al atom present on the host structure for al_index in host_aluminum: + # gets the 4 closest atoms to it (hence the bonded ones), Al-O dist ~1.79 candidate_oxygens = [ dist_index for dist_index, dist in enumerate(distance_matrices[al_index]) - if 0 < dist < 1.8 and dist_index in host_oxygens + if 0 < dist < 1.9 and dist_index in host_oxygens ] + # if there are 4 bonds and any of the 4 oxygens has an H bonded to it + # Then the 4 oxygens are considered acid sites and can form a bond with the cation if len(candidate_oxygens) == 4 and all( all( not (bond_dist < 1.10 and bond_dist != 0.0) @@ -227,7 +266,19 @@ def find_acid_sites(self, distance_matrices, atomtypes_indexes): def get_catan_distances( self, acid_oxygens, cation_index, distance_matrices, complex ): - """Check the cation-anion distances for the different acid sites in the zeolite.""" + """Check the distances between the cation and anion for different acid sites in the zeolite. + + Args: + acid_oxygens (list): List of lists with 4 oxygen atom indexes attached to each acid site; these oxygen atoms account for the negative charge to be compensated by the cation. + cation_index (int): Index corresponding to the carbon (C) or nitrogen (N) atom that hosts the positive charge on the molecule. + distance_matrices (list): List of distance matrix lists for each atom with respect to all other atoms. + complex (structure): Structure object representing the host-guest complex. + + Returns: + bool: True if both requirements are met; False if either of the requirements isn't met. + list: List of distance lists between the cation index and the 4 acid oxygens corresponding to each acid Al. + """ + distances_catan = [] for acid_al in acid_oxygens: distances_cation_anion = [ @@ -269,6 +320,16 @@ class MinDistanceCationAnionFitness(ThresholdFitness): HELP = "Complexes have positive score if the minimum distance between host anion and guest cation is below the given threshold plus Complexes have positive score if the minimum distance between host and guest is above the given threshold" def __call__(self, complex): + """Docks a guest cation into a host with anionic spots while ensuring a minimal distance between them. + + Args: + complex (structure): The host-guest complex object containing the host and guest molecules. + + Returns: + int: 1 if both MinDistanceFitness and MinDistanceCationAnionFitness criteria are met. + float: Negative infinity (-np.inf) if either MinDistanceFitness or MinDistanceCationAnionFitness criteria are not met. + """ + pose = complex.pose distance_matrices = complex.pose.distance_matrix atomtypes_indexes = self.get_atomtypes_indexes(pose) From f1c1392f276a1a8639befa321185363f806f3f59 Mon Sep 17 00:00:00 2001 From: Pau Ferri-Vicedo Date: Wed, 29 May 2024 17:35:13 -0400 Subject: [PATCH 04/13] threshold for cation-anion distance added as an argument threshold_catan in def __init__ for ThresholdFitness class --- VOID/fitness/threshold.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/VOID/fitness/threshold.py b/VOID/fitness/threshold.py index 9d33a00..f68b5eb 100644 --- a/VOID/fitness/threshold.py +++ b/VOID/fitness/threshold.py @@ -6,13 +6,21 @@ THRESHOLD = 1.5 +THRESHOLD_CATAN = 2.0 DEFAULT_STRUCTURE = "complex" STRUCTURE_CHOICES = ["complex", "guest", "host"] DEFAULT_STEP = False class ThresholdFitness(Fitness): - def __init__(self, threshold=THRESHOLD, structure="complex", step=False, **kwargs): + def __init__( + self, + threshold=THRESHOLD, + threshold_catan=THRESHOLD_CATAN, + structure="complex", + step=False, + **kwargs, + ): """Fitness is positive if the minimum distance is above the given threshold. @@ -23,6 +31,7 @@ def __init__(self, threshold=THRESHOLD, structure="complex", step=False, **kwarg """ super().__init__() self.threshold = threshold + self.threshold_catan = threshold_catan self.step = step if structure not in STRUCTURE_CHOICES: @@ -39,6 +48,12 @@ def add_arguments(parser): help="threshold for distance calculations (default: %(default)s)", default=THRESHOLD, ) + parser.add_argument( + "--threshold_catan", + type=float, + help="threshold for cation-anion distance calculations (default: %(default)s)", + default=THRESHOLD_CATAN, + ) parser.add_argument( "--structure", type=str, @@ -288,14 +303,14 @@ def get_catan_distances( distances_catan.append(distances_cation_anion) if ( - any(dist < 2.0 for sublist in distances_catan for dist in sublist) + any( + dist < self.threshold_catan + for sublist in distances_catan + for dist in sublist + ) and self.normalize(self.get_distances(complex).min() - self.threshold) > 0 ): - print("Optimal distance found! Aborting the run") - print( - "Distances between cation and acid oxygens are:", - distances_catan, - ) + print("Optimal cation-anion distance found! Aborting the run") return True, distances_catan else: From 887d6eae2733a07ff1adca69badd1f5ecaee0542 Mon Sep 17 00:00:00 2001 From: Pau Ferri-Vicedo Date: Sat, 1 Jun 2024 13:53:29 -0400 Subject: [PATCH 05/13] only cation-anion distance function and threshold class remain, cation-anion index identifiers moved out from VOID and parsed as external arguments --- VOID/dockers/base.py | 10 +- VOID/dockers/success.py | 8 +- VOID/fitness/threshold.py | 273 +++++--------------------------------- 3 files changed, 44 insertions(+), 247 deletions(-) diff --git a/VOID/dockers/base.py b/VOID/dockers/base.py index 83a32df..8be2682 100644 --- a/VOID/dockers/base.py +++ b/VOID/dockers/base.py @@ -50,18 +50,20 @@ def new_guest(self, newcoords=None): if newcoords is None: return self.guest.copy() - return Molecule(species=self.guest.species, coords=newcoords,) + return Molecule( + species=self.guest.species, + coords=newcoords, + ) def create_new_complex(self, host_coords, guest_coords): return Complex( self.new_host(newcoords=host_coords), self.new_guest(newcoords=guest_coords), - add_transform=False + add_transform=False, ) def dock(self, attempts: int) -> List[Complex]: - """Docks the guest into the host. - """ + """Docks the guest into the host.""" complexes = [] for point in self.sampler.get_points(self.host): complexes += self.dock_at_point(point, attempts) diff --git a/VOID/dockers/success.py b/VOID/dockers/success.py index 9cbffcd..e024eae 100644 --- a/VOID/dockers/success.py +++ b/VOID/dockers/success.py @@ -17,8 +17,7 @@ def dock_at_point(self, point, attempts): for trial in range(attempts): cpx = self.create_new_complex( - host_coords=hcoords, - guest_coords=self.rotate_guest() + host_coords=hcoords, guest_coords=self.rotate_guest() ) if self.fitness(cpx) >= 0: @@ -30,7 +29,9 @@ def dock_at_point(self, point, attempts): class SuccessMonteCarloDocker(MonteCarloDocker): PARSER_NAME = "mcsuccess" - HELP = "Docks guests to host until a successful docking is found (Monte Carlo version)" + HELP = ( + "Docks guests to host until a successful docking is found (Monte Carlo version)" + ) def dock(self, attempts): cpx = Complex(self.host.copy(), self.guest.copy()) @@ -40,6 +41,7 @@ def dock(self, attempts): if self.fitness(cpx) >= 0: print(f"{trial + 1} attempts to success") + cpx = self.rescale(cpx) return [cpx] return [] diff --git a/VOID/fitness/threshold.py b/VOID/fitness/threshold.py index f68b5eb..9486599 100644 --- a/VOID/fitness/threshold.py +++ b/VOID/fitness/threshold.py @@ -61,6 +61,18 @@ def add_arguments(parser): help="threshold for distance calculations (default: %(default)s)", default=DEFAULT_STRUCTURE, ) + parser.add_argument( + "--cation_index", + type=int, + help="index for the atom holding the positive charge in the molecule (default: %(default)s)", + default=None, + ) + parser.add_argument( + "--acid_sites", + type=list, + help="list of indexes for the O atoms that hold a negative charge (default: %(default)s)", + default=None, + ) def get_distances(self, complex): if self.structure == "complex": @@ -77,244 +89,28 @@ def get_distances(self, complex): else: raise ValueError("structure type not supported") - def get_atomtypes_indexes(self, pose): - """Collect all the atomtypes indexes in the structure + def get_cation_anion_distances(self, acid_sites, cation_index, distance_matrices): + """Get the distances between the cation and the anion sites. Args: - pose (structure): Structure object containing atomtype, xyz, and host/guest features + acid_sites (list): List of lists of anion indexes. + cation_index (int): Index of the cation. + distance_matrices (list): List of distance matrices. Returns: - atomtypes_indexes (dict): dictionary comprising information about each atomtype, its indexes and the "host"/"guest" nature - """ - atomtypes_indexes = {} - - for index, site in enumerate(pose): - atom_type = site.species_string - feature = site.label # labels the atomtype as host or guest - if atom_type not in atomtypes_indexes: - atomtypes_indexes[atom_type] = [] - atomtypes_indexes[atom_type].append((index, feature)) - - return atomtypes_indexes - - def find_cation_index(self, distance_matrices, atomtypes_indexes): - """Identify the cation position in the guest molecule. This function is intended for monocationic species. - - Args: - distance_matrices (list): List of distance matrix lists for each atom with respect to all other atoms. - atomtypes_indexes (dict): Dictionary comprising information about each atomtype, its indexes, and its "host"/"guest" nature. - - Raises: - ValueError: If the code cannot find the cation on the guest molecule. - - Returns: - int: Index corresponding to the carbon (C) or nitrogen (N) atom that hosts the positive charge on the molecule. - """ - - # First retrieves the indexes for each atomtype, more can be added if needed - carbon_indexes = [ - idx for idx, lbl in atomtypes_indexes.get("C", []) if lbl == "guest" - ] - nitrogen_indexes = [ - idx for idx, lbl in atomtypes_indexes.get("N", []) if lbl == "guest" - ] - oxygen_indexes = [ - idx for idx, lbl in atomtypes_indexes.get("O", []) if lbl == "guest" - ] - hydrogen_indexes = [ - idx for idx, lbl in atomtypes_indexes.get("H", []) if lbl == "guest" - ] - - # Checks the numbers of bonds for each C atom, takes into account double and triple bonds - for carbon_index in carbon_indexes: - bonds = 0 - # Check all the dsitances and bond types for C-H, C-C, C-N, C-O - for i, dist in enumerate(distance_matrices[carbon_index]): - if ( - i in carbon_indexes - or i in hydrogen_indexes - or i in nitrogen_indexes - or i in oxygen_indexes - ): - if 0 < dist < 1.15 and i in hydrogen_indexes: # exp C-H dist 1.09 A - bonds += 1 - elif ( - 1.5 < dist < 1.6 and i in carbon_indexes - ): # exp C-C dist 1.55 A - bonds += 1 - elif ( - 1.29 < dist < 1.39 and i in carbon_indexes - ): # exp C=C dist 1.34 A - bonds += 2 - elif ( - 1.15 < dist < 1.25 and i in carbon_indexes - ): # exp C≡C dist 1.20 A - bonds += 3 - elif ( - 1.38 < dist < 1.48 and i in nitrogen_indexes - ): # exp C-N dist 1.43 A - bonds += 1 - elif ( - 1.33 < dist < 1.43 and i in nitrogen_indexes - ): # exp C=N dist 1.38 A - bonds += 2 - elif ( - 1.11 < dist < 1.21 and i in nitrogen_indexes - ): # exp C≡N dist 1.16 A - bonds += 3 - elif ( - 1.38 < dist < 1.48 and i in oxygen_indexes - ): # exp C-O dist 1.43 A - bonds += 1 - elif ( - 1.18 < dist < 1.28 and i in oxygen_indexes - ): # exp C=O dist 1.28 A - bonds += 2 - elif ( - 1.08 < dist < 1.18 and i in oxygen_indexes - ): # exp C≡O dist 1.18 A - bonds += 3 - - # If a C atom has only 3 bonds, identifies this atom as the positive charge holder - if bonds == 3: - return carbon_index - - # Checks the numbers of bonds for each N atom, takes into account double and triple bonds - for nitrogen_index in nitrogen_indexes: - bonds = 0 - # Checks all distances and bond types for N-H, N-C, N-N and N-O - for i, dist in enumerate(distance_matrices[nitrogen_index]): - if ( - i in carbon_indexes - or i in hydrogen_indexes - or i in nitrogen_indexes - or i in oxygen_indexes - ): - if 0 < dist < 1.05 and i in hydrogen_indexes: # exp N-H dist 1.00 A - bonds += 1 - elif ( - 1.42 < dist < 1.52 and i in carbon_indexes - ): # exp N-N dist 1.47 A - bonds += 1 - elif ( - 1.19 < dist < 1.29 and i in carbon_indexes - ): # exp N=N dist 1.24 A - bonds += 2 - elif ( - 1.05 < dist < 1.15 and i in carbon_indexes - ): # exp N≡N dist 1.10 A - bonds += 3 - elif ( - 1.38 < dist < 1.48 and i in nitrogen_indexes - ): # exp C-N dist 1.43 A - bonds += 1 - elif ( - 1.33 < dist < 1.43 and i in nitrogen_indexes - ): # exp C=N dist 1.38 A - bonds += 2 - elif ( - 1.11 < dist < 1.21 and i in nitrogen_indexes - ): # exp C≡N dist 1.16 A - bonds += 3 - elif ( - 1.39 < dist < 1.49 and i in oxygen_indexes - ): # exp N-O dist 1.44 A - bonds += 1 - elif ( - 1.15 < dist < 1.25 and i in oxygen_indexes - ): # exp C=O dist 1.20 A - bonds += 2 - - if bonds == 4: - return nitrogen_index - - # If no cation index is found, raise an error with a custom message - raise ValueError( - "Cation index could not be found. Please check the molecule you are docking." - ) - - def find_acid_sites(self, distance_matrices, atomtypes_indexes): - """Identify the acid sites in the host structure. - - Args: - distance_matrices (list): List of distance matrix lists for each atom with respect to all other atoms. - atomtypes_indexes (dict): Dictionary containing information about each atomtype, its indexes, and its "host"/"guest" nature. - - Returns: - list: List of lists with 4 oxygen atom indexes attached to each acid site. These oxygen atoms account for the negative charge to be compensated by the cation. - list: List of aluminum (Al) indexes, whose bonded oxygens are not compensated by any proton. (Not used throughout the code at present.) - """ - - acid_oxygens = [] - acid_al_indexes = [] - - host_oxygens = [ - idx for idx, lbl in atomtypes_indexes.get("O", []) if lbl == "host" - ] - host_aluminum = [ - idx for idx, lbl in atomtypes_indexes.get("Al", []) if lbl == "host" - ] - ## Room to add more metals if needed for docking to metal slides or so - - # Checks every Al atom present on the host structure - for al_index in host_aluminum: - # gets the 4 closest atoms to it (hence the bonded ones), Al-O dist ~1.79 - candidate_oxygens = [ - dist_index - for dist_index, dist in enumerate(distance_matrices[al_index]) - if 0 < dist < 1.9 and dist_index in host_oxygens - ] - # if there are 4 bonds and any of the 4 oxygens has an H bonded to it - # Then the 4 oxygens are considered acid sites and can form a bond with the cation - if len(candidate_oxygens) == 4 and all( - all( - not (bond_dist < 1.10 and bond_dist != 0.0) - for bond_dist in distance_matrices[ox_index] - ) - for ox_index in candidate_oxygens - ): # 1.10 accounts for O-H bond - acid_oxygens.append(candidate_oxygens) - acid_al_indexes.append(al_index) - - return acid_oxygens, acid_al_indexes - - def get_catan_distances( - self, acid_oxygens, cation_index, distance_matrices, complex - ): - """Check the distances between the cation and anion for different acid sites in the zeolite. - - Args: - acid_oxygens (list): List of lists with 4 oxygen atom indexes attached to each acid site; these oxygen atoms account for the negative charge to be compensated by the cation. - cation_index (int): Index corresponding to the carbon (C) or nitrogen (N) atom that hosts the positive charge on the molecule. - distance_matrices (list): List of distance matrix lists for each atom with respect to all other atoms. - complex (structure): Structure object representing the host-guest complex. - Returns: - bool: True if both requirements are met; False if either of the requirements isn't met. - list: List of distance lists between the cation index and the 4 acid oxygens corresponding to each acid Al. + list: List of lists of distances between the cation and the anion sites. """ distances_catan = [] - for acid_al in acid_oxygens: + for acid_al in acid_sites: distances_cation_anion = [ distance_matrices[cation_index][ox_index] for ox_index in acid_al ] distances_catan.append(distances_cation_anion) - if ( - any( - dist < self.threshold_catan - for sublist in distances_catan - for dist in sublist - ) - and self.normalize(self.get_distances(complex).min() - self.threshold) > 0 - ): - print("Optimal cation-anion distance found! Aborting the run") - return True, distances_catan - - else: - return False, distances_catan + return distances_catan def normalize(self, value): if self.step: @@ -341,25 +137,22 @@ def __call__(self, complex): complex (structure): The host-guest complex object containing the host and guest molecules. Returns: - int: 1 if both MinDistanceFitness and MinDistanceCationAnionFitness criteria are met. - float: Negative infinity (-np.inf) if either MinDistanceFitness or MinDistanceCationAnionFitness criteria are not met. + float: The score of the docking process. Returns normalized minimum distance if the optimal cation-anion distance is found, otherwise returns negative infinity. """ - - pose = complex.pose - distance_matrices = complex.pose.distance_matrix - atomtypes_indexes = self.get_atomtypes_indexes(pose) - - cation_index = self.find_cation_index(distance_matrices, atomtypes_indexes) - acid_sites, acid_al_indexes = self.find_acid_sites( - distance_matrices, atomtypes_indexes + cation_anion_distances = self.get_cation_anion_distances( + self.acid_sites, self.cation_index, complex.pose.distance_matrix ) - converged, distances = self.get_catan_distances( - acid_sites, cation_index, distance_matrices, complex - ) - - if converged: - return 1 + if ( + any( + distance < self.threshold_catan + for distance_list in cation_anion_distances + for distance in distance_list + ) + and self.normalize(self.get_distances(complex).min() - self.threshold) > 0 + ): + print("Optimal cation-anion distance found! Aborting the run") + return self.normalize(self.get_distances(complex).min()) else: return -np.inf From 6629f9bb68150188de4fa6c605df6888373be906 Mon Sep 17 00:00:00 2001 From: Pau Ferri-Vicedo Date: Sat, 1 Jun 2024 16:42:17 -0400 Subject: [PATCH 06/13] parses external arguments properly, though the class inheritance for MinDistanceCationFitness can be cleaner --- VOID/fitness/threshold.py | 51 +++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/VOID/fitness/threshold.py b/VOID/fitness/threshold.py index 9486599..585cac6 100644 --- a/VOID/fitness/threshold.py +++ b/VOID/fitness/threshold.py @@ -1,15 +1,15 @@ import numpy as np - +import argparse from .base import Fitness - -from pymatgen.core.sites import Site - +import ipdb THRESHOLD = 1.5 THRESHOLD_CATAN = 2.0 DEFAULT_STRUCTURE = "complex" STRUCTURE_CHOICES = ["complex", "guest", "host"] DEFAULT_STEP = False +CATION_INDEX = None +ACID_SITES = None class ThresholdFitness(Fitness): @@ -17,6 +17,8 @@ def __init__( self, threshold=THRESHOLD, threshold_catan=THRESHOLD_CATAN, + cation_index=CATION_INDEX, + acid_sites=ACID_SITES, structure="complex", step=False, **kwargs, @@ -65,13 +67,13 @@ def add_arguments(parser): "--cation_index", type=int, help="index for the atom holding the positive charge in the molecule (default: %(default)s)", - default=None, + default=CATION_INDEX, ) parser.add_argument( "--acid_sites", type=list, help="list of indexes for the O atoms that hold a negative charge (default: %(default)s)", - default=None, + default=ACID_SITES, ) def get_distances(self, complex): @@ -102,15 +104,11 @@ def get_cation_anion_distances(self, acid_sites, cation_index, distance_matrices list: List of lists of distances between the cation and the anion sites. """ - distances_catan = [] - for acid_al in acid_sites: - distances_cation_anion = [ - distance_matrices[cation_index][ox_index] for ox_index in acid_al - ] - - distances_catan.append(distances_cation_anion) - - return distances_catan + distances_cation_anion = [ + [distance_matrices[cation_index][anion_index] for anion_index in anion_list] + for anion_list in acid_sites + ] + return distances_cation_anion def normalize(self, value): if self.step: @@ -130,6 +128,24 @@ class MinDistanceCationAnionFitness(ThresholdFitness): PARSER_NAME = "min_catan_distance" HELP = "Complexes have positive score if the minimum distance between host anion and guest cation is below the given threshold plus Complexes have positive score if the minimum distance between host and guest is above the given threshold" + def __init__( + self, + threshold, + threshold_catan, + cation_index, + acid_sites, + structure, + step, + **kwargs, + ): + super().__init__(**kwargs) + self.threshold = threshold + self.threshold_catan = threshold_catan + self.cation_index = cation_index + self.acid_sites = acid_sites + self.structure = structure + self.step = step + def __call__(self, complex): """Docks a guest cation into a host with anionic spots while ensuring a minimal distance between them. @@ -139,8 +155,11 @@ def __call__(self, complex): Returns: float: The score of the docking process. Returns normalized minimum distance if the optimal cation-anion distance is found, otherwise returns negative infinity. """ + cation_anion_distances = self.get_cation_anion_distances( - self.acid_sites, self.cation_index, complex.pose.distance_matrix + self.acid_sites, + self.cation_index, + complex.pose.distance_matrix, ) if ( From 03e9feaa6dbdee13dd5e18a375ff6dfd77a8d80d Mon Sep 17 00:00:00 2001 From: Pau Ferri-Vicedo Date: Sat, 1 Jun 2024 17:06:20 -0400 Subject: [PATCH 07/13] Class inheritance and argument parsing distributed between ThresholdFitness and MinCationAnionFitness so --threshold_catan, --cation_index and --acid_sites will be ignored if any other kind of Fitness is called --- VOID/dockers/success.py | 57 +++++++++++++++++++++++++++++++++++ VOID/fitness/threshold.py | 62 ++++++++++++++++++--------------------- 2 files changed, 86 insertions(+), 33 deletions(-) diff --git a/VOID/dockers/success.py b/VOID/dockers/success.py index e024eae..ee63162 100644 --- a/VOID/dockers/success.py +++ b/VOID/dockers/success.py @@ -1,5 +1,8 @@ import numpy as np +from pymatgen.core.sites import Site +from pymatgen.core import Lattice, Structure + from VOID.structure import Complex from VOID.dockers.serial import SerialDocker from VOID.dockers.mcdocker import MonteCarloDocker @@ -45,3 +48,57 @@ def dock(self, attempts): return [cpx] return [] + + def rescale(complex): + """Rescale the complex to the 0-1 range so results can be visualized in direct and xyz format. + + Args: + complex (Complex): The host-guest complex object containing the host and guest molecules. + + Returns: + Complex: The rescaled host-guest complex object. + """ + lattice = complex.pose.lattice + frac_coords = [] + species_list = [] + site_labels = [] + + for site in complex.pose: + site_labels.append(site.label) + species_list.append(site.species) + coords = ( + site.frac_coords + if site.label == "host" + else np.mod(site.frac_coords, 1.0) + ) + frac_coords.append(coords) + + site_properties = {"label": site_labels} + + updated_structure = Structure( + lattice, species_list, frac_coords, site_properties=site_properties + ) + + num_host_atoms = len(complex.host) + + species = updated_structure.species + cart_coords = updated_structure.cart_coords + + # Update the host and guest with the rescaled 0-1 species and coordinates + complex.host = Structure( + lattice=complex.host.lattice, + species=species[:num_host_atoms], + coords=cart_coords[:num_host_atoms], + coords_are_cartesian=True, + site_properties=complex.host.site_properties, + ) + + complex.guest = Structure( + lattice=complex.host.lattice, + species=species[num_host_atoms:], + coords=cart_coords[num_host_atoms:], + coords_are_cartesian=True, + site_properties=complex.guest.site_properties, + ) + + return complex diff --git a/VOID/fitness/threshold.py b/VOID/fitness/threshold.py index 585cac6..11273d0 100644 --- a/VOID/fitness/threshold.py +++ b/VOID/fitness/threshold.py @@ -16,9 +16,6 @@ class ThresholdFitness(Fitness): def __init__( self, threshold=THRESHOLD, - threshold_catan=THRESHOLD_CATAN, - cation_index=CATION_INDEX, - acid_sites=ACID_SITES, structure="complex", step=False, **kwargs, @@ -33,7 +30,6 @@ def __init__( """ super().__init__() self.threshold = threshold - self.threshold_catan = threshold_catan self.step = step if structure not in STRUCTURE_CHOICES: @@ -50,12 +46,6 @@ def add_arguments(parser): help="threshold for distance calculations (default: %(default)s)", default=THRESHOLD, ) - parser.add_argument( - "--threshold_catan", - type=float, - help="threshold for cation-anion distance calculations (default: %(default)s)", - default=THRESHOLD_CATAN, - ) parser.add_argument( "--structure", type=str, @@ -63,18 +53,6 @@ def add_arguments(parser): help="threshold for distance calculations (default: %(default)s)", default=DEFAULT_STRUCTURE, ) - parser.add_argument( - "--cation_index", - type=int, - help="index for the atom holding the positive charge in the molecule (default: %(default)s)", - default=CATION_INDEX, - ) - parser.add_argument( - "--acid_sites", - type=list, - help="list of indexes for the O atoms that hold a negative charge (default: %(default)s)", - default=ACID_SITES, - ) def get_distances(self, complex): if self.structure == "complex": @@ -130,21 +108,39 @@ class MinDistanceCationAnionFitness(ThresholdFitness): def __init__( self, - threshold, - threshold_catan, - cation_index, - acid_sites, - structure, - step, - **kwargs, + threshold=THRESHOLD, + threshold_catan=THRESHOLD_CATAN, + structure=DEFAULT_STRUCTURE, + cation_index=None, + acid_sites=None, ): - super().__init__(**kwargs) - self.threshold = threshold + super().__init__(threshold) self.threshold_catan = threshold_catan self.cation_index = cation_index self.acid_sites = acid_sites - self.structure = structure - self.step = step + + @staticmethod + def add_arguments(parser): + ThresholdFitness.add_arguments(parser) + + parser.add_argument( + "--threshold_catan", + type=float, + help="threshold for cation-anion distance calculations (default: %(default)s)", + default=THRESHOLD_CATAN, + ) + parser.add_argument( + "--cation_index", + type=int, + help="index for the atom holding the positive charge in the molecule (default: %(default)s)", + default=CATION_INDEX, + ) + parser.add_argument( + "--acid_sites", + type=list, + help="list of indexes for the O atoms that hold a negative charge (default: %(default)s)", + default=ACID_SITES, + ) def __call__(self, complex): """Docks a guest cation into a host with anionic spots while ensuring a minimal distance between them. From fd4ec78c2ce7d17ad6373dc076f5b6d0cd2b8008 Mon Sep 17 00:00:00 2001 From: Pau Ferri-Vicedo Date: Sat, 1 Jun 2024 18:30:26 -0400 Subject: [PATCH 08/13] final ammendments --- VOID/dockers/success.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VOID/dockers/success.py b/VOID/dockers/success.py index ee63162..53fe41e 100644 --- a/VOID/dockers/success.py +++ b/VOID/dockers/success.py @@ -49,7 +49,7 @@ def dock(self, attempts): return [] - def rescale(complex): + def rescale(self, complex): """Rescale the complex to the 0-1 range so results can be visualized in direct and xyz format. Args: From 705377ad45913190290f63613307dd2621a7778b Mon Sep 17 00:00:00 2001 From: Pau Ferri-Vicedo Date: Tue, 4 Jun 2024 14:20:32 -0400 Subject: [PATCH 09/13] ammendments so cation_indexes and get_cation_anion_distances work for multicationic molecules --- VOID/fitness/threshold.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/VOID/fitness/threshold.py b/VOID/fitness/threshold.py index 11273d0..a348d09 100644 --- a/VOID/fitness/threshold.py +++ b/VOID/fitness/threshold.py @@ -8,7 +8,7 @@ DEFAULT_STRUCTURE = "complex" STRUCTURE_CHOICES = ["complex", "guest", "host"] DEFAULT_STEP = False -CATION_INDEX = None +CATION_INDEXES = None ACID_SITES = None @@ -69,7 +69,7 @@ def get_distances(self, complex): else: raise ValueError("structure type not supported") - def get_cation_anion_distances(self, acid_sites, cation_index, distance_matrices): + def get_cation_anion_distances(self, acid_sites, cation_indexes, distance_matrices): """Get the distances between the cation and the anion sites. Args: @@ -82,10 +82,14 @@ def get_cation_anion_distances(self, acid_sites, cation_index, distance_matrices list: List of lists of distances between the cation and the anion sites. """ - distances_cation_anion = [ - [distance_matrices[cation_index][anion_index] for anion_index in anion_list] - for anion_list in acid_sites - ] + distances_cation_anion = [] + for cation in cation_indexes: + distances = [ + distance_matrices[cation][anion_index] + for anion_list in acid_sites + for anion_index in anion_list + ] + distances_cation_anion.append(distances) return distances_cation_anion def normalize(self, value): @@ -111,12 +115,12 @@ def __init__( threshold=THRESHOLD, threshold_catan=THRESHOLD_CATAN, structure=DEFAULT_STRUCTURE, - cation_index=None, + cation_indexes=None, acid_sites=None, ): super().__init__(threshold) self.threshold_catan = threshold_catan - self.cation_index = cation_index + self.cation_indexes = cation_indexes self.acid_sites = acid_sites @staticmethod @@ -130,10 +134,10 @@ def add_arguments(parser): default=THRESHOLD_CATAN, ) parser.add_argument( - "--cation_index", - type=int, - help="index for the atom holding the positive charge in the molecule (default: %(default)s)", - default=CATION_INDEX, + "--cation_indexes", + type=list, + help="indexes for the atoms holding a positive charge in the molecule (default: %(default)s)", + default=CATION_INDEXES, ) parser.add_argument( "--acid_sites", @@ -154,7 +158,7 @@ def __call__(self, complex): cation_anion_distances = self.get_cation_anion_distances( self.acid_sites, - self.cation_index, + self.cation_indexes, complex.pose.distance_matrix, ) From 6c8d1529d5a7c8af40d8a03eec67b067b332f74a Mon Sep 17 00:00:00 2001 From: Pau Ferri-Vicedo Date: Mon, 18 Nov 2024 17:06:21 -0500 Subject: [PATCH 10/13] Cation-Anion docking example folder added and explained at examples/README.md --- examples/Cation_Anion/.gitignore | 1 + examples/Cation_Anion/DEB+.xyz | 25 +++ examples/Cation_Anion/job.sh | 17 ++ examples/Cation_Anion/structure.cif | 249 ++++++++++++++++++++++++++++ examples/README.md | 1 + 5 files changed, 293 insertions(+) create mode 100644 examples/Cation_Anion/.gitignore create mode 100644 examples/Cation_Anion/DEB+.xyz create mode 100755 examples/Cation_Anion/job.sh create mode 100644 examples/Cation_Anion/structure.cif diff --git a/examples/Cation_Anion/.gitignore b/examples/Cation_Anion/.gitignore new file mode 100644 index 0000000..3f91e5b --- /dev/null +++ b/examples/Cation_Anion/.gitignore @@ -0,0 +1 @@ +mcdocked vdocked diff --git a/examples/Cation_Anion/DEB+.xyz b/examples/Cation_Anion/DEB+.xyz new file mode 100644 index 0000000..20ce3f4 --- /dev/null +++ b/examples/Cation_Anion/DEB+.xyz @@ -0,0 +1,25 @@ +23 + +C 2.8824 -0.9488 0.572 +C 1.4172 -0.9826 0.3101 +C 0.617 0.2149 0.0164 +C 1.1534 1.491 0.2607 +C 0.415 2.6507 0.0142 +C -0.8793 2.5564 -0.4785 +C -1.4323 1.3024 -0.7264 +C -0.6986 0.1219 -0.4908 +C -1.3828 -1.195 -0.7917 +C -2.1371 -1.7357 0.4145 +H 3.087 -0.5902 1.5851 +H 3.2955 -1.9588 0.4837 +H 3.4081 -0.3169 -0.1502 +H 0.9012 -1.9173 0.4953 +H 2.1576 1.6066 0.6605 +H 0.853 3.625 0.2135 +H -1.4591 3.4554 -0.6683 +H -2.4492 1.2451 -1.1101 +H -0.6596 -1.934 -1.154 +H -2.0863 -1.0578 -1.6232 +H -1.4677 -1.912 1.2627 +H -2.9137 -1.0361 0.742 +H -2.6216 -2.6843 0.1624 diff --git a/examples/Cation_Anion/job.sh b/examples/Cation_Anion/job.sh new file mode 100755 index 0000000..d5a9a27 --- /dev/null +++ b/examples/Cation_Anion/job.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# Runs until the first success is found Also with Monte Carlo docking +echo "" +echo "In this example we must supply the --cation_indexes (start counting after the framework, remember 0 indexing)" +echo "and the --acid_sites (oxygens bonded to the heteroatom)" +echo "" +echo "--acid_sites doesn't need all the acid sites present in the system, you can choose to sample for a preferred one if needed" +echo "" +echo "This example runs the docking of a DEB+ molecule into an Al-UTL zeolite framework" +echo "" +echo "If job fails to reach a final pose you can tune --threshold_catan, --threshold and --attempts parameters" +echo "" +echo "Running Monte Carlo docking" +echo "" +python3 ../../scripts/dock.py structure.cif DEB+.xyz -d mcsuccess -s random -f min_catan_distance -o mcdocked --threshold_catan 3.1 --threshold 1.5 --attempts 20000 --cation_indexes 225 --acid_sites 84,92,90,96 +echo "Final pose save to mcdocked folder" diff --git a/examples/Cation_Anion/structure.cif b/examples/Cation_Anion/structure.cif new file mode 100644 index 0000000..e2d4169 --- /dev/null +++ b/examples/Cation_Anion/structure.cif @@ -0,0 +1,249 @@ +data_image0 +_chemical_formula_structural O148AlSi75 +_chemical_formula_sum "O148 Al1 Si75" +_cell_length_a 28.996 +_cell_length_b 13.968 +_cell_length_c 12.449 +_cell_angle_alpha 90 +_cell_angle_beta 104.91 +_cell_angle_gamma 90 + +_space_group_name_H-M_alt "P 1" +_space_group_IT_number 1 + +loop_ + _space_group_symop_operation_xyz + 'x, y, z' + +loop_ + _atom_site_type_symbol + _atom_site_label + _atom_site_symmetry_multiplicity + _atom_site_fract_x + _atom_site_fract_y + _atom_site_fract_z + _atom_site_occupancy + O O1 1.0 0.41940 0.67970 0.74110 1.0000 + O O2 1.0 0.91940 0.17970 0.74110 1.0000 + O O3 1.0 0.58060 0.67970 0.25890 1.0000 + O O4 1.0 0.08060 0.17970 0.25890 1.0000 + O O5 1.0 0.58060 0.32030 0.25890 1.0000 + O O6 1.0 0.08060 0.82030 0.25890 1.0000 + O O7 1.0 0.41940 0.32030 0.74110 1.0000 + O O8 1.0 0.91940 0.82030 0.74110 1.0000 + O O9 1.0 0.50760 0.62900 0.84150 1.0000 + O O10 1.0 0.00760 0.12900 0.84150 1.0000 + O O11 1.0 0.49240 0.62900 0.15850 1.0000 + O O12 1.0 0.99240 0.12900 0.15850 1.0000 + O O13 1.0 0.49240 0.37100 0.15850 1.0000 + O O14 1.0 0.99240 0.87100 0.15850 1.0000 + O O15 1.0 0.50760 0.37100 0.84150 1.0000 + O O16 1.0 0.00760 0.87100 0.84150 1.0000 + O O17 1.0 0.43940 0.50000 0.79880 1.0000 + O O18 1.0 0.93940 0.00000 0.79880 1.0000 + O O19 1.0 0.56060 0.50000 0.20120 1.0000 + O O20 1.0 0.06060 0.00000 0.20120 1.0000 + O O21 1.0 0.44140 0.63100 0.95140 1.0000 + O O22 1.0 0.94140 0.13100 0.95140 1.0000 + O O23 1.0 0.55860 0.63100 0.04860 1.0000 + O O24 1.0 0.05860 0.13100 0.04860 1.0000 + O O25 1.0 0.55860 0.36900 0.04860 1.0000 + O O26 1.0 0.05860 0.86900 0.04860 1.0000 + O O27 1.0 0.44140 0.36900 0.95140 1.0000 + O O28 1.0 0.94140 0.86900 0.95140 1.0000 + O O29 1.0 0.40230 0.68090 0.11120 1.0000 + O O30 1.0 0.90230 0.18090 0.11120 1.0000 + O O31 1.0 0.59770 0.68090 0.88880 1.0000 + O O32 1.0 0.09770 0.18090 0.88880 1.0000 + O O33 1.0 0.59770 0.31910 0.88880 1.0000 + O O34 1.0 0.09770 0.81910 0.88880 1.0000 + O O35 1.0 0.40230 0.31910 0.11120 1.0000 + O O36 1.0 0.90230 0.81910 0.11120 1.0000 + O O37 1.0 0.42520 0.50000 0.08900 1.0000 + O O38 1.0 0.92520 0.00000 0.08900 1.0000 + O O39 1.0 0.57480 0.50000 0.91100 1.0000 + O O40 1.0 0.07480 1.00000 0.91100 1.0000 + O O41 1.0 0.71730 0.50000 0.82440 1.0000 + O O42 1.0 0.28270 0.50000 0.17560 1.0000 + O O43 1.0 0.29570 0.90600 0.83550 1.0000 + O O44 1.0 0.79570 0.40600 0.83550 1.0000 + O O45 1.0 0.70430 0.90600 0.16450 1.0000 + O O46 1.0 0.20430 0.40600 0.16450 1.0000 + O O47 1.0 0.70430 0.09400 0.16450 1.0000 + O O48 1.0 0.20430 0.59400 0.16450 1.0000 + O O49 1.0 0.29570 0.09400 0.83550 1.0000 + O O50 1.0 0.79570 0.59400 0.83550 1.0000 + O O51 1.0 0.75460 0.50000 0.65120 1.0000 + O O52 1.0 0.24540 0.50000 0.34880 1.0000 + O O53 1.0 0.19410 0.40480 0.78530 1.0000 + O O54 1.0 0.69410 0.90480 0.78530 1.0000 + O O55 1.0 0.80590 0.40480 0.21470 1.0000 + O O56 1.0 0.30590 0.90480 0.21470 1.0000 + O O57 1.0 0.80590 0.59520 0.21470 1.0000 + O O58 1.0 0.30590 0.09520 0.21470 1.0000 + O O59 1.0 0.19410 0.59520 0.78530 1.0000 + O O60 1.0 0.69410 0.09520 0.78530 1.0000 + O O61 1.0 0.27080 0.50000 0.77760 1.0000 + O O62 1.0 0.77080 0.00000 0.77760 1.0000 + O O63 1.0 0.72920 0.50000 0.22240 1.0000 + O O64 1.0 0.22920 0.00000 0.22240 1.0000 + O O65 1.0 0.24180 0.50000 0.96080 1.0000 + O O66 1.0 0.74180 0.00000 0.96080 1.0000 + O O67 1.0 0.75820 0.50000 0.03920 1.0000 + O O68 1.0 0.25820 1.00000 0.03920 1.0000 + O O69 1.0 0.27360 0.50000 0.56640 1.0000 + O O70 1.0 0.77360 0.00000 0.56640 1.0000 + O O71 1.0 0.72640 0.50000 0.43360 1.0000 + O O72 1.0 0.22640 1.00000 0.43360 1.0000 + O O73 1.0 0.33790 0.40620 0.71610 1.0000 + O O74 1.0 0.83790 0.90620 0.71610 1.0000 + O O75 1.0 0.66210 0.40620 0.28390 1.0000 + O O76 1.0 0.16210 0.90620 0.28390 1.0000 + O O77 1.0 0.66210 0.59380 0.28390 1.0000 + O O78 1.0 0.16210 0.09380 0.28390 1.0000 + O O79 1.0 0.33790 0.59380 0.71610 1.0000 + O O80 1.0 0.83790 0.09380 0.71610 1.0000 + O O81 1.0 0.19670 0.40550 0.46700 1.0000 + O O82 1.0 0.69670 0.90550 0.46700 1.0000 + O O83 1.0 0.80330 0.40550 0.53300 1.0000 + O O84 1.0 0.30330 0.90550 0.53300 1.0000 + O O85 1.0 0.80330 0.59450 0.53300 1.0000 + O O86 1.0 0.30330 0.09450 0.53300 1.0000 + O O87 1.0 0.19670 0.59450 0.46700 1.0000 + O O88 1.0 0.69670 0.09450 0.46700 1.0000 + O O89 1.0 0.32880 0.72540 0.19190 1.0000 + O O90 1.0 0.82880 0.22540 0.19190 1.0000 + O O91 1.0 0.67120 0.72540 0.80810 1.0000 + O O92 1.0 0.17120 0.22540 0.80810 1.0000 + O O93 1.0 0.67120 0.27460 0.80810 1.0000 + O O94 1.0 0.17120 0.77460 0.80810 1.0000 + O O95 1.0 0.32880 0.27460 0.19190 1.0000 + O O96 1.0 0.82880 0.77460 0.19190 1.0000 + O O97 1.0 0.24010 0.76930 0.19020 1.0000 + O O98 1.0 0.74010 0.26930 0.19020 1.0000 + O O99 1.0 0.75990 0.76930 0.80980 1.0000 + O O100 1.0 0.25990 0.26930 0.80980 1.0000 + O O101 1.0 0.75990 0.23070 0.80980 1.0000 + O O102 1.0 0.25990 0.73070 0.80980 1.0000 + O O103 1.0 0.24010 0.23070 0.19020 1.0000 + O O104 1.0 0.74010 0.73070 0.19020 1.0000 + O O105 1.0 0.30850 0.78300 0.37510 1.0000 + O O106 1.0 0.80850 0.28300 0.37510 1.0000 + O O107 1.0 0.69150 0.78300 0.62490 1.0000 + O O108 1.0 0.19150 0.28300 0.62490 1.0000 + O O109 1.0 0.69150 0.21700 0.62490 1.0000 + O O110 1.0 0.19150 0.71700 0.62490 1.0000 + O O111 1.0 0.30850 0.21700 0.37510 1.0000 + O O112 1.0 0.80850 0.71700 0.37510 1.0000 + O O113 1.0 0.34340 0.73760 0.58370 1.0000 + O O114 1.0 0.84340 0.23760 0.58370 1.0000 + O O115 1.0 0.65660 0.73760 0.41630 1.0000 + O O116 1.0 0.15660 0.23760 0.41630 1.0000 + O O117 1.0 0.65660 0.26240 0.41630 1.0000 + O O118 1.0 0.15660 0.76240 0.41630 1.0000 + O O119 1.0 0.34340 0.26240 0.58370 1.0000 + O O120 1.0 0.84340 0.76240 0.58370 1.0000 + O O121 1.0 0.25000 0.75000 0.50000 1.0000 + O O122 1.0 0.75000 0.25000 0.50000 1.0000 + O O123 1.0 0.75000 0.75000 0.50000 1.0000 + O O124 1.0 0.25000 0.25000 0.50000 1.0000 + O O125 1.0 0.34790 0.76700 0.79510 1.0000 + O O126 1.0 0.84790 0.26700 0.79510 1.0000 + O O127 1.0 0.65210 0.76700 0.20490 1.0000 + O O128 1.0 0.15210 0.26700 0.20490 1.0000 + O O129 1.0 0.65210 0.23300 0.20490 1.0000 + O O130 1.0 0.15210 0.73300 0.20490 1.0000 + O O131 1.0 0.34790 0.23300 0.79510 1.0000 + O O132 1.0 0.84790 0.73300 0.79510 1.0000 + O O133 1.0 0.32810 0.77530 0.98810 1.0000 + O O134 1.0 0.82810 0.27530 0.98810 1.0000 + O O135 1.0 0.67190 0.77530 0.01190 1.0000 + O O136 1.0 0.17190 0.27530 0.01190 1.0000 + O O137 1.0 0.67190 0.22470 0.01190 1.0000 + O O138 1.0 0.17190 0.72470 0.01190 1.0000 + O O139 1.0 0.32810 0.22470 0.98810 1.0000 + O O140 1.0 0.82810 0.72470 0.98810 1.0000 + O O141 1.0 0.32120 0.59430 0.03640 1.0000 + O O142 1.0 0.82120 0.09430 0.03640 1.0000 + O O143 1.0 0.67880 0.59430 0.96360 1.0000 + O O144 1.0 0.17880 0.09430 0.96360 1.0000 + O O145 1.0 0.67880 0.40570 0.96360 1.0000 + O O146 1.0 0.17880 0.90570 0.96360 1.0000 + O O147 1.0 0.32120 0.40570 0.03640 1.0000 + O O148 1.0 0.82120 0.90570 0.03640 1.0000 + Al Al1 1.0 0.45210 0.60990 0.83300 1.0000 + Si Si1 1.0 0.95210 0.10990 0.83300 1.0000 + Si Si2 1.0 0.54790 0.60990 0.16700 1.0000 + Si Si3 1.0 0.04790 0.10990 0.16700 1.0000 + Si Si4 1.0 0.54790 0.39010 0.16700 1.0000 + Si Si5 1.0 0.04790 0.89010 0.16700 1.0000 + Si Si6 1.0 0.45210 0.39010 0.83300 1.0000 + Si Si7 1.0 0.95210 0.89010 0.83300 1.0000 + Si Si8 1.0 0.44010 0.60990 0.07770 1.0000 + Si Si9 1.0 0.94010 0.10990 0.07770 1.0000 + Si Si10 1.0 0.55990 0.60990 0.92230 1.0000 + Si Si11 1.0 0.05990 0.10990 0.92230 1.0000 + Si Si12 1.0 0.55990 0.39010 0.92230 1.0000 + Si Si13 1.0 0.05990 0.89010 0.92230 1.0000 + Si Si14 1.0 0.44010 0.39010 0.07770 1.0000 + Si Si15 1.0 0.94010 0.89010 0.07770 1.0000 + Si Si16 1.0 0.29540 0.79540 0.24220 1.0000 + Si Si17 1.0 0.79540 0.29540 0.24220 1.0000 + Si Si18 1.0 0.70460 0.79540 0.75780 1.0000 + Si Si19 1.0 0.20460 0.29540 0.75780 1.0000 + Si Si20 1.0 0.70460 0.20460 0.75780 1.0000 + Si Si21 1.0 0.20460 0.70460 0.75780 1.0000 + Si Si22 1.0 0.29540 0.20460 0.24220 1.0000 + Si Si23 1.0 0.79540 0.70460 0.24220 1.0000 + Si Si24 1.0 0.30100 0.79410 0.49820 1.0000 + Si Si25 1.0 0.80100 0.29410 0.49820 1.0000 + Si Si26 1.0 0.69900 0.79410 0.50180 1.0000 + Si Si27 1.0 0.19900 0.29410 0.50180 1.0000 + Si Si28 1.0 0.69900 0.20590 0.50180 1.0000 + Si Si29 1.0 0.19900 0.70590 0.50180 1.0000 + Si Si30 1.0 0.30100 0.20590 0.49820 1.0000 + Si Si31 1.0 0.80100 0.70590 0.49820 1.0000 + Si Si32 1.0 0.30720 0.79380 0.85690 1.0000 + Si Si33 1.0 0.80720 0.29380 0.85690 1.0000 + Si Si34 1.0 0.69280 0.79380 0.14310 1.0000 + Si Si35 1.0 0.19280 0.29380 0.14310 1.0000 + Si Si36 1.0 0.69280 0.20620 0.14310 1.0000 + Si Si37 1.0 0.19280 0.70620 0.14310 1.0000 + Si Si38 1.0 0.30720 0.20620 0.85690 1.0000 + Si Si39 1.0 0.80720 0.70620 0.85690 1.0000 + Si Si40 1.0 0.34530 0.69500 0.08270 1.0000 + Si Si41 1.0 0.84530 0.19500 0.08270 1.0000 + Si Si42 1.0 0.65470 0.69500 0.91730 1.0000 + Si Si43 1.0 0.15470 0.19500 0.91730 1.0000 + Si Si44 1.0 0.65470 0.30500 0.91730 1.0000 + Si Si45 1.0 0.15470 0.80500 0.91730 1.0000 + Si Si46 1.0 0.34530 0.30500 0.08270 1.0000 + Si Si47 1.0 0.84530 0.80500 0.08270 1.0000 + Si Si48 1.0 0.36240 0.69570 0.70760 1.0000 + Si Si49 1.0 0.86240 0.19570 0.70760 1.0000 + Si Si50 1.0 0.63760 0.69570 0.29240 1.0000 + Si Si51 1.0 0.13760 0.19570 0.29240 1.0000 + Si Si52 1.0 0.63760 0.30430 0.29240 1.0000 + Si Si53 1.0 0.13760 0.80430 0.29240 1.0000 + Si Si54 1.0 0.36240 0.30430 0.70760 1.0000 + Si Si55 1.0 0.86240 0.80430 0.70760 1.0000 + Si Si56 1.0 0.26600 0.00000 0.78470 1.0000 + Si Si57 1.0 0.76600 0.50000 0.78470 1.0000 + Si Si58 1.0 0.73400 0.00000 0.21530 1.0000 + Si Si59 1.0 0.23400 0.50000 0.21530 1.0000 + Si Si60 1.0 0.22470 0.50000 0.82700 1.0000 + Si Si61 1.0 0.77530 0.50000 0.17300 1.0000 + Si Si62 1.0 0.27530 0.00000 0.17300 1.0000 + Si Si63 1.0 0.72470 0.00000 0.82700 1.0000 + Si Si64 1.0 0.30460 0.50000 0.69360 1.0000 + Si Si65 1.0 0.69540 0.50000 0.30640 1.0000 + Si Si66 1.0 0.19540 0.00000 0.30640 1.0000 + Si Si67 1.0 0.80460 0.00000 0.69360 1.0000 + Si Si68 1.0 0.22780 0.50000 0.46120 1.0000 + Si Si69 1.0 0.77220 0.50000 0.53880 1.0000 + Si Si70 1.0 0.27220 0.00000 0.53880 1.0000 + Si Si71 1.0 0.72780 0.00000 0.46120 1.0000 + Si Si72 1.0 0.29210 0.50000 0.05360 1.0000 + Si Si73 1.0 0.70790 0.50000 0.94640 1.0000 + Si Si74 1.0 0.20790 0.00000 0.94640 1.0000 + Si Si75 1.0 0.79210 0.00000 0.05360 1.0000 diff --git a/examples/README.md b/examples/README.md index f18ddf6..0f619dc 100644 --- a/examples/README.md +++ b/examples/README.md @@ -7,5 +7,6 @@ The following examples are currently available in this repository: 3. [Docking triethylamine to MOR zeolite](MOR_loading): several triethylamine molecules are docked to MOR zeolite with Voronoi batch docker. 4. [Docking benzene to MOF-5](MOF-5): several benzene molecules are docked to MOF-5 using the Voronoi batched docker. The MOF-5 structure was retrieved from the Cambridge Structural Database (ID [SAHYIK](https://www.ccdc.cam.ac.uk/structures/search?identifier=SAHYIK)) 5. [Docking water to a Ni(111) surface](Ni111): one water molecule is docked to a Ni(111) surface using the Gaussian target fitness function. The Ni(111) surface structure was retrieved from the [Materials Project](https://materialsproject.org) (ID [mp-23](https://materialsproject.org/materials/mp-23/surfaces/[1,%201,%201]/cif)) +6. [Docking Diethylbenzenium cation (DEB+) into an acid UTL zeolite](Cation_Anion): One DEB+ molecule is docked to a UTL framework containing one Al atom, hence 4 acid sites. The minimmum cation-anion fitness ensures a close positioning of the docked molecule with respect to the acid sites of the host. Each example has a `job.sh` script file showing how to run the docker. From e6083b675f751400719f74d5432a06d0597641ff Mon Sep 17 00:00:00 2001 From: Pau Ferri-Vicedo Date: Mon, 18 Nov 2024 17:39:39 -0500 Subject: [PATCH 11/13] nothing changed just formatting --- VOID/utils/setup.py | 14 ++------------ examples/Cation_Anion/job.sh | 2 +- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/VOID/utils/setup.py b/VOID/utils/setup.py index dc5fcca..65ae601 100644 --- a/VOID/utils/setup.py +++ b/VOID/utils/setup.py @@ -13,11 +13,7 @@ def get_module_classes(self, module): def get_docker_kwargs(self, docker_class): if self.args["docker"] in ["mcdocker", "mcsuccess"]: - return { - k: self.args[k] - for k in ["temperature", "temperature_profile"] - if k in self.args - } + return {k: self.args[k] for k in ["temperature", "temperature_profile"] if k in self.args} return {} @@ -29,13 +25,7 @@ def get_docker(self): fitness = self.get_fitness() host, guest = self.get_structures() - docker = cls( - host=host, - guest=guest, - sampler=sampler, - fitness=fitness, - **self.get_docker_kwargs(cls) - ) + docker = cls(host=host, guest=guest, sampler=sampler, fitness=fitness, **self.get_docker_kwargs(cls)) return docker diff --git a/examples/Cation_Anion/job.sh b/examples/Cation_Anion/job.sh index d5a9a27..8c81749 100755 --- a/examples/Cation_Anion/job.sh +++ b/examples/Cation_Anion/job.sh @@ -13,5 +13,5 @@ echo "If job fails to reach a final pose you can tune --threshold_catan, --thres echo "" echo "Running Monte Carlo docking" echo "" -python3 ../../scripts/dock.py structure.cif DEB+.xyz -d mcsuccess -s random -f min_catan_distance -o mcdocked --threshold_catan 3.1 --threshold 1.5 --attempts 20000 --cation_indexes 225 --acid_sites 84,92,90,96 +python3 ../../scripts/dock.py structure.cif DEB+.xyz -d mcsuccess -s random -f min_catan_distance -o mcdocked --threshold_catan 3.1 --threshold 1.5 --attempts 20000 --cation_indexes 225 --acid_sites 76,84,92,96 echo "Final pose save to mcdocked folder" From 92b69a6ae4d9391344e69258145af72908a6a2da Mon Sep 17 00:00:00 2001 From: Pau Ferri-Vicedo Date: Mon, 18 Nov 2024 17:41:28 -0500 Subject: [PATCH 12/13] reescale function now receives an input complex as cpx, makes a copy from it, complex = cpx.copy() and works with the copied object from then onwards --- VOID/dockers/success.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/VOID/dockers/success.py b/VOID/dockers/success.py index 53fe41e..a4eff7b 100644 --- a/VOID/dockers/success.py +++ b/VOID/dockers/success.py @@ -19,9 +19,7 @@ def dock_at_point(self, point, attempts): hcoords = self.translate_host(point) for trial in range(attempts): - cpx = self.create_new_complex( - host_coords=hcoords, guest_coords=self.rotate_guest() - ) + cpx = self.create_new_complex(host_coords=hcoords, guest_coords=self.rotate_guest()) if self.fitness(cpx) >= 0: print(f"{trial + 1} attempts to success") @@ -32,9 +30,7 @@ def dock_at_point(self, point, attempts): class SuccessMonteCarloDocker(MonteCarloDocker): PARSER_NAME = "mcsuccess" - HELP = ( - "Docks guests to host until a successful docking is found (Monte Carlo version)" - ) + HELP = "Docks guests to host until a successful docking is found (Monte Carlo version)" def dock(self, attempts): cpx = Complex(self.host.copy(), self.guest.copy()) @@ -49,7 +45,7 @@ def dock(self, attempts): return [] - def rescale(self, complex): + def rescale(self, cpx): """Rescale the complex to the 0-1 range so results can be visualized in direct and xyz format. Args: @@ -58,6 +54,7 @@ def rescale(self, complex): Returns: Complex: The rescaled host-guest complex object. """ + complex = cpx.copy() lattice = complex.pose.lattice frac_coords = [] species_list = [] @@ -66,18 +63,12 @@ def rescale(self, complex): for site in complex.pose: site_labels.append(site.label) species_list.append(site.species) - coords = ( - site.frac_coords - if site.label == "host" - else np.mod(site.frac_coords, 1.0) - ) + coords = site.frac_coords if site.label == "host" else np.mod(site.frac_coords, 1.0) frac_coords.append(coords) site_properties = {"label": site_labels} - updated_structure = Structure( - lattice, species_list, frac_coords, site_properties=site_properties - ) + updated_structure = Structure(lattice, species_list, frac_coords, site_properties=site_properties) num_host_atoms = len(complex.host) From 3e687bca19e0e2a5d65089cc4aafddfea88297be Mon Sep 17 00:00:00 2001 From: Pau Ferri-Vicedo Date: Mon, 18 Nov 2024 17:43:56 -0500 Subject: [PATCH 13/13] added **kwargs parsing on the __init__ for MinDistanceCationAnionFitness class and improved the type on the argument parsing for --cation_indexes and --acid_sites variables so they work with both htvs and command line input formats --- VOID/fitness/threshold.py | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/VOID/fitness/threshold.py b/VOID/fitness/threshold.py index a348d09..815acbd 100644 --- a/VOID/fitness/threshold.py +++ b/VOID/fitness/threshold.py @@ -1,10 +1,9 @@ import numpy as np import argparse from .base import Fitness -import ipdb THRESHOLD = 1.5 -THRESHOLD_CATAN = 2.0 +THRESHOLD_CATAN = 3.5 DEFAULT_STRUCTURE = "complex" STRUCTURE_CHOICES = ["complex", "guest", "host"] DEFAULT_STEP = False @@ -31,11 +30,10 @@ def __init__( super().__init__() self.threshold = threshold self.step = step + self.extra_args = kwargs if structure not in STRUCTURE_CHOICES: - raise ValueError( - "structure has to be one of: {}".format(", ".join(STRUCTURE_CHOICES)) - ) + raise ValueError("structure has to be one of: {}".format(", ".join(STRUCTURE_CHOICES))) self.structure = structure @staticmethod @@ -84,11 +82,7 @@ def get_cation_anion_distances(self, acid_sites, cation_indexes, distance_matric distances_cation_anion = [] for cation in cation_indexes: - distances = [ - distance_matrices[cation][anion_index] - for anion_list in acid_sites - for anion_index in anion_list - ] + distances = [distance_matrices[cation][anion_index] for anion_list in acid_sites for anion_index in anion_list] distances_cation_anion.append(distances) return distances_cation_anion @@ -117,8 +111,9 @@ def __init__( structure=DEFAULT_STRUCTURE, cation_indexes=None, acid_sites=None, + **kwargs, ): - super().__init__(threshold) + super().__init__(threshold, structure, **kwargs) self.threshold_catan = threshold_catan self.cation_indexes = cation_indexes self.acid_sites = acid_sites @@ -135,13 +130,13 @@ def add_arguments(parser): ) parser.add_argument( "--cation_indexes", - type=list, + type=lambda x: [int(i) for i in x.split(",")], help="indexes for the atoms holding a positive charge in the molecule (default: %(default)s)", default=CATION_INDEXES, ) parser.add_argument( "--acid_sites", - type=list, + type=lambda x: [list(map(int, group.split(","))) for group in x.split(";")], help="list of indexes for the O atoms that hold a negative charge (default: %(default)s)", default=ACID_SITES, ) @@ -163,11 +158,7 @@ def __call__(self, complex): ) if ( - any( - distance < self.threshold_catan - for distance_list in cation_anion_distances - for distance in distance_list - ) + any(distance < self.threshold_catan for distance_list in cation_anion_distances for distance in distance_list) and self.normalize(self.get_distances(complex).min() - self.threshold) > 0 ): print("Optimal cation-anion distance found! Aborting the run") @@ -182,9 +173,7 @@ class MeanDistanceFitness(ThresholdFitness): HELP = "Complexes have positive score if the mean distance between host and guest is above the given threshold" def __call__(self, complex, axis=-1): - return self.normalize( - self.get_distances(complex).min(axis=axis).mean() - self.threshold - ) + return self.normalize(self.get_distances(complex).min(axis=axis).mean() - self.threshold) class SumInvDistanceFitness(ThresholdFitness):