diff --git a/HISTORY.txt b/HISTORY.txt index bc675609..c910f6bb 100644 --- a/HISTORY.txt +++ b/HISTORY.txt @@ -355,3 +355,4 @@ 03-Apr-2024 V1.717 Add int_list cifType to DataTypeApplicationInfo 09-Apr-2024 V1.718 Update RepoLoadExec CLI and RepoLoadWorkflow to support CLI usage from weekly-update workflow 6-May-2024 V1.719 Updates to CLI utilities + 9-May-2024 V1.720 Adjust provider type exclusion input to accept a list of types; update setuptools config diff --git a/README.md b/README.md index f4bd13bc..ed48134c 100644 --- a/README.md +++ b/README.md @@ -247,7 +247,7 @@ optional arguments: --force_reload Force re-load of provided ID list (i.e., don't just load delta; useful for manual/test runs). - --provider_type_exclude PROVIDER_TYPE_EXCLUDE + --provider_types_exclude Resource provider types to exclude --db_type DB_TYPE Database server type (default=mongo) --file_limit FILE_LIMIT diff --git a/rcsb/db/cli/RepoLoadExec.py b/rcsb/db/cli/RepoLoadExec.py index de23615b..2a46048d 100644 --- a/rcsb/db/cli/RepoLoadExec.py +++ b/rcsb/db/cli/RepoLoadExec.py @@ -93,7 +93,7 @@ def main(): parser.add_argument("--load_file_list_dir", default=None, help="Directory path for storing load file lists") parser.add_argument("--num_sublists", default=None, help="Number of sublists to create/load for the associated database") parser.add_argument("--force_reload", default=False, action="store_true", help="Force re-load of provided ID list (i.e., don't just load delta; useful for manual/test runs).") - parser.add_argument("--provider_type_exclude", default=None, help="Resource provider types to exclude") + parser.add_argument("--provider_types_exclude", default=None, help="Resource provider types to exclude") # parser.add_argument("--db_type", default="mongo", help="Database server type (default=mongo)") parser.add_argument("--file_limit", default=None, help="Load file limit for testing") @@ -130,7 +130,7 @@ def main(): okR = rlWf.load(op, **loadD) # elif op == "build_resource_cache": - okR = rlWf.buildResourceCache(rebuildCache=True, providerTypeExclude=loadD["providerTypeExclude"]) + okR = rlWf.buildResourceCache(rebuildCache=True, providerTypeExcludeL=loadD["providerTypeExcludeL"]) # elif op == "pdbx_id_list_splitter": okR = rlWf.splitIdList(op, **loadD) @@ -249,7 +249,7 @@ def processArguments(args): "documentStyle": args.document_style, "dataSelectors": dataSelectors, "mergeValidationReports": not args.disable_merge_validation_reports, - "providerTypeExclude": args.provider_type_exclude, + "providerTypeExcludeL": args.provider_types_exclude, "clusterFileNameTemplate": args.cluster_filename_template, "rebuildCache": args.rebuild_cache, "forceReload": args.force_reload, diff --git a/rcsb/db/cli/__init__.py b/rcsb/db/cli/__init__.py index 697de798..ba7b7e16 100644 --- a/rcsb/db/cli/__init__.py +++ b/rcsb/db/cli/__init__.py @@ -2,4 +2,4 @@ __author__ = "John Westbrook" __email__ = "john.westbrook@rcsb.org" __license__ = "Apache 2.0" -__version__ = "1.719" +__version__ = "1.720" diff --git a/rcsb/db/mongo/PdbxLoader.py b/rcsb/db/mongo/PdbxLoader.py index 2535267d..ef7797cc 100644 --- a/rcsb/db/mongo/PdbxLoader.py +++ b/rcsb/db/mongo/PdbxLoader.py @@ -40,6 +40,7 @@ # containers fails to be read properly (incl. validation reports); # Begin adding code to support weekly update workflow CLI requirements # 26-Mar-2024 dwp Add arguments and logic to support CLI usage from weekly-update workflow +# 9-May-2024 dwp Change providerTypeExclude to be a list, 'providerTypeExcludeL' # ## """ @@ -159,7 +160,7 @@ def load( validateFailures=True, rebuildCache=False, reloadPartial=True, - providerTypeExclude=None, + providerTypeExcludeL=None, restoreUseGit=True, restoreUseStash=True, forceReload=False, @@ -186,7 +187,7 @@ def load( validateFailures (bool, optional): output validation report on load failures rebuildCache (bool, optional): whether to force rebuild of all cache resources (default is False, to just check them) reloadPartial (bool, optional): on load failures attempt reload of partial objects. - providerTypeExclude (str, optional): exclude dictionary method provider by type name. Defaults to None. + providerTypeExcludeL (list, optional): exclude dictionary method providers by type name. Defaults to None. restoreUseStash (bool, optional): restore cache resources using stash storage. Defaults to True. restoreUseGit (bool, optional): restore cache resources using git storage. Defaults to True. forceReload (bool, optional): Force re-load of provided ID list (i.e., don't just load delta; useful for manual/test runs) @@ -241,18 +242,19 @@ def load( logger.info("Saving %d paths in %s", len(locatorObjList), saveInputFileListPath) # --- # Don't load resource providers which are irrelevant to 'pdbx_core' or 'pdbx_comp_model_core' - if not providerTypeExclude: - if databaseName == "pdbx_core": - providerTypeExclude = "pdbx_comp_model_core" - if databaseName == "pdbx_comp_model_core": - providerTypeExclude = "pdbx_core" + if not providerTypeExcludeL: + providerTypeExcludeL = [] + if databaseName in ["pdbx_core", "bird_chem_comp_core"]: + providerTypeExcludeL.append("pdbx_comp_model_core") + if databaseName in ["pdbx_comp_model_core", "bird_chem_comp_core"]: + providerTypeExcludeL.append("pdbx_core") # modulePathMap = self.__cfgOb.get("DICT_METHOD_HELPER_MODULE_PATH_MAP", sectionName=self.__cfgSectionName) dP = DictionaryApiProviderWrapper(self.__cachePath, cfgOb=self.__cfgOb, useCache=True) dictApi = dP.getApiByName(databaseName) # --- dmrP = DictMethodResourceProvider( - self.__cfgOb, cachePath=self.__cachePath, restoreUseStash=restoreUseStash, restoreUseGit=restoreUseGit, providerTypeExclude=providerTypeExclude + self.__cfgOb, cachePath=self.__cachePath, restoreUseStash=restoreUseStash, restoreUseGit=restoreUseGit, providerTypeExcludeL=providerTypeExcludeL ) # Cache dependencies in serial mode. useCacheInCheck = not rebuildCache diff --git a/rcsb/db/mysql/SchemaDefLoader.py b/rcsb/db/mysql/SchemaDefLoader.py index bc25425e..f8f0219f 100644 --- a/rcsb/db/mysql/SchemaDefLoader.py +++ b/rcsb/db/mysql/SchemaDefLoader.py @@ -72,7 +72,7 @@ def __init__( verbose=True, restoreUseStash=True, restoreUseGit=True, - providerTypeExclude=True, + providerTypeExcludeL=None, ): self.__verbose = verbose self.__debug = False @@ -103,7 +103,12 @@ def __init__( modulePathMap = self.__cfgOb.get("DICT_METHOD_HELPER_MODULE_PATH_MAP", sectionName=sectionName) dP = DictionaryApiProviderWrapper(self.__cachePath, cfgOb=self.__cfgOb, configName=sectionName, useCache=True) dictApi = dP.getApiByName(schemaName) - rP = DictMethodResourceProvider(self.__cfgOb, cachePath=self.__cachePath, restoreUseStash=restoreUseStash, restoreUseGit=restoreUseGit, providerTypeExclude=providerTypeExclude) + rP = DictMethodResourceProvider( + self.__cfgOb, cachePath=self.__cachePath, + restoreUseStash=restoreUseStash, + restoreUseGit=restoreUseGit, + providerTypeExcludeL=providerTypeExcludeL, + ) self.__dmh = DictMethodRunner(dictApi, modulePathMap=modulePathMap, resourceProvider=rP) def setWarning(self, action): diff --git a/rcsb/db/tests-mongo/testPdbxLoader.py b/rcsb/db/tests-mongo/testPdbxLoader.py index c321e691..e0345d47 100644 --- a/rcsb/db/tests-mongo/testPdbxLoader.py +++ b/rcsb/db/tests-mongo/testPdbxLoader.py @@ -57,7 +57,7 @@ def setUp(self): # # self.__isMac = platform.system() == "Darwin" - self.__excludeType = None if self.__isMac else "optional" + self.__excludeTypeL = None if self.__isMac else ["optional"] self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data") configPath = os.path.join(TOPDIR, "rcsb", "db", "config", "exdb-config-example.yml") configName = "site_info_configuration" @@ -169,7 +169,7 @@ def __pdbxLoaderWrapper(self, **kwargs): updateSchemaOnReplace=kwargs["updateSchemaOnReplace"], restoreUseStash=False, restoreUseGit=True, - providerTypeExclude=self.__excludeType, + providerTypeExcludeL=self.__excludeTypeL, ) self.assertEqual(ok, kwargs["status"]) ok = self.__loadStatus(mw.getLoadStatus()) diff --git a/rcsb/db/tests-mongo/testPdbxLoaderRemote.py b/rcsb/db/tests-mongo/testPdbxLoaderRemote.py index 593e200c..f46503de 100644 --- a/rcsb/db/tests-mongo/testPdbxLoaderRemote.py +++ b/rcsb/db/tests-mongo/testPdbxLoaderRemote.py @@ -55,7 +55,7 @@ def setUp(self): # # self.__isMac = platform.system() == "Darwin" - self.__excludeType = None if self.__isMac else "optional" + self.__excludeTypeL = None if self.__isMac else ["optional"] mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data") configPath = os.path.join(TOPDIR, "rcsb", "db", "config", "exdb-config-example.yml") configName = "site_info_configuration" @@ -157,7 +157,7 @@ def __pdbxLoaderWrapper(self, **kwargs): updateSchemaOnReplace=kwargs["updateSchemaOnReplace"], restoreUseStash=False, restoreUseGit=True, - providerTypeExclude=self.__excludeType, + providerTypeExcludeL=self.__excludeTypeL, ) self.assertEqual(ok, kwargs["status"]) ok = self.__loadStatus(mw.getLoadStatus()) diff --git a/rcsb/db/tests-mysql/testSchemaDefLoaderDb.py b/rcsb/db/tests-mysql/testSchemaDefLoaderDb.py index bfb83fbf..538b52b0 100644 --- a/rcsb/db/tests-mysql/testSchemaDefLoaderDb.py +++ b/rcsb/db/tests-mysql/testSchemaDefLoaderDb.py @@ -49,7 +49,7 @@ def __init__(self, methodName="runTest"): def setUp(self): self.__isMac = platform.system() == "Darwin" - self.__excludeType = None if self.__isMac else "optional" + self.__excludeTypeL = None if self.__isMac else ["optional"] self.__verbose = True # fileLimit = 100 @@ -134,7 +134,7 @@ def testLoadBirdReference(self): verbose=self.__verbose, restoreUseStash=False, restoreUseGit=True, - providerTypeExclude=self.__excludeType, + providerTypeExcludeL=self.__excludeTypeL, ) ok = sdl.load(inputPathList=inputPathList, loadType="batch-file") self.assertTrue(ok) @@ -163,7 +163,7 @@ def testReLoadBirdReference(self): verbose=self.__verbose, restoreUseStash=False, restoreUseGit=True, - providerTypeExclude=self.__excludeType, + providerTypeExcludeL=self.__excludeTypeL, ) sdl.load(inputPathList=inputPathList, loadType="batch-file") # @@ -197,7 +197,7 @@ def testLoadChemCompReference(self): verbose=self.__verbose, restoreUseStash=False, restoreUseGit=True, - providerTypeExclude=self.__excludeType, + providerTypeExcludeL=self.__excludeTypeL, ) ok = sdl.load(inputPathList=inputPathList, loadType="batch-file") self.assertTrue(ok) @@ -226,7 +226,7 @@ def testLoadPdbxFiles(self): verbose=self.__verbose, restoreUseStash=False, restoreUseGit=True, - providerTypeExclude=self.__excludeType, + providerTypeExcludeL=self.__excludeTypeL, ) ok = sdl.load(inputPathList=inputPathList, loadType="batch-insert", deleteOpt="all") self.assertTrue(ok) diff --git a/rcsb/db/tests-validate/testSchemaDataPrepValidate.py b/rcsb/db/tests-validate/testSchemaDataPrepValidate.py index c10e082c..bfaafe39 100644 --- a/rcsb/db/tests-validate/testSchemaDataPrepValidate.py +++ b/rcsb/db/tests-validate/testSchemaDataPrepValidate.py @@ -59,7 +59,7 @@ def chunkList(seq, size): class SchemaDataPrepValidateTests(unittest.TestCase): def setUp(self): self.__isMac = platform.system() == "Darwin" - self.__excludeType = None if self.__isMac else "optional" + self.__excludeTypeL = None if self.__isMac else ["optional"] self.__numProc = 2 # self.__fileLimit = None self.__fileLimit = 20 @@ -270,7 +270,7 @@ def __testPrepDocumentsFromContainers(self, inputPathList, databaseName, collect cachePath=self.__cachePath, restoreUseStash=False, restoreUseGit=True, - providerTypeExclude=self.__excludeType, + providerTypeExcludeL=self.__excludeTypeL, ) dmh = DictMethodRunner(dictApi, modulePathMap=self.__modulePathMap, resourceProvider=rP) # diff --git a/rcsb/db/tests/fixtureDictMethodResourceProvider.py b/rcsb/db/tests/fixtureDictMethodResourceProvider.py index 956bdc42..e8caf49e 100644 --- a/rcsb/db/tests/fixtureDictMethodResourceProvider.py +++ b/rcsb/db/tests/fixtureDictMethodResourceProvider.py @@ -37,7 +37,7 @@ class DictMethodResourceProviderFixture(unittest.TestCase): def setUp(self): self.__isMac = platform.system() == "Darwin" - self.__excludeType = None if self.__isMac else "optional" + self.__excludeTypeL = None if self.__isMac else ["optional"] self.__cachePath = os.path.join(TOPDIR, "CACHE") self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data") configPath = os.path.join(TOPDIR, "rcsb", "db", "config", "exdb-config-example.yml") @@ -64,7 +64,7 @@ def testRecoverResourceCache(self): cachePath=self.__cachePath, restoreUseGit=True, restoreUseStash=False, - providerTypeExclude=self.__excludeType, + providerTypeExcludeL=self.__excludeTypeL, ) ret = rp.cacheResources(useCache=True) self.assertTrue(ret) diff --git a/rcsb/db/tests/testSchemaDefDataPrep.py b/rcsb/db/tests/testSchemaDefDataPrep.py index a738357c..313ca0f3 100644 --- a/rcsb/db/tests/testSchemaDefDataPrep.py +++ b/rcsb/db/tests/testSchemaDefDataPrep.py @@ -67,7 +67,7 @@ def __init__(self, methodName="runTest"): def setUp(self): self.__isMac = platform.system() == "Darwin" - self.__excludeType = None if self.__isMac else "optional" + self.__excludeTypeL = None if self.__isMac else ["optional"] self.__numProc = 2 mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data") self.__cachePath = os.path.join(TOPDIR, "CACHE") @@ -311,7 +311,7 @@ def __fullSchemaDataPrep(self, contentType, filterType, styleType, mockLength, r cachePath=self.__cachePath, restoreUseStash=False, restoreUseGit=True, - providerTypeExclude=self.__excludeType, + providerTypeExcludeL=self.__excludeTypeL, ) dmh = DictMethodRunner(dictApi, modulePathMap=self.__modulePathMap, resourceProvider=rP) # diff --git a/rcsb/db/wf/RepoLoadWorkflow.py b/rcsb/db/wf/RepoLoadWorkflow.py index 4d578a99..ffc9a93d 100644 --- a/rcsb/db/wf/RepoLoadWorkflow.py +++ b/rcsb/db/wf/RepoLoadWorkflow.py @@ -89,7 +89,7 @@ def load(self, op, **kwargs): pruneDocumentSize = kwargs.get("pruneDocumentSize", None) pruneDocumentSize = float(pruneDocumentSize) if pruneDocumentSize else None regexPurge = kwargs.get("regexPurge", False) - providerTypeExclude = kwargs.get("providerTypeExclude", None) + providerTypeExcludeL = kwargs.get("providerTypeExcludeL", None) clusterFileNameTemplate = kwargs.get("clusterFileNameTemplate", None) # # "Document organization (rowwise_by_name_with_cardinality|rowwise_by_name|columnwise_by_name|rowwise_by_id|rowwise_no_name", @@ -159,7 +159,7 @@ def load(self, op, **kwargs): regexPurge=regexPurge, validationLevel=schemaLevel, mergeContentTypes=mergeContentTypes, - providerTypeExclude=providerTypeExclude, + providerTypeExcludeL=providerTypeExcludeL, updateSchemaOnReplace=updateSchemaOnReplace, rebuildCache=rebuildCache, forceReload=forceReload, @@ -213,7 +213,7 @@ def loadStatus(self, statusList, readBackCheck=True): logger.exception("Failing with %s", str(e)) return ret - def buildResourceCache(self, rebuildCache=False, providerTypeExclude=None, restoreUseStash=True, restoreUseGit=True): + def buildResourceCache(self, rebuildCache=False, providerTypeExcludeL=None, restoreUseStash=True, restoreUseGit=True): """Generate and cache resource dependencies.""" ret = False try: @@ -232,7 +232,7 @@ def buildResourceCache(self, rebuildCache=False, providerTypeExclude=None, resto cachePath=self.__cachePath, restoreUseStash=restoreUseStash, restoreUseGit=restoreUseGit, - providerTypeExclude=providerTypeExclude, + providerTypeExcludeL=providerTypeExcludeL, ) ret = rP.cacheResources(useCache=useCache, doBackup=False, useStash=False, useGit=False) logger.info("useCache %r cache reload status (%r)", useCache, ret) diff --git a/setup.cfg b/setup.cfg index 53bb53db..360a483e 100755 --- a/setup.cfg +++ b/setup.cfg @@ -3,5 +3,5 @@ universal=1 [metadata] -description-file = README.md +description_file = README.md