From feedfde502ba65359677e7bce6a29d60fd2bee4a Mon Sep 17 00:00:00 2001 From: Douglas Myers-Turnbull Date: Wed, 5 Feb 2025 14:36:01 -0800 Subject: [PATCH] drop: support for unused databases --- README.md | 19 +- azure-pipelines.yml | 8 +- azure-template-tox-job.yml | 44 +- pylintrc | 10 - rcsb/db/cockroach/CockroachDbLoader.py | 237 ---- rcsb/db/cockroach/CockroachDbUtil.py | 274 ----- rcsb/db/cockroach/Connection.py | 160 --- rcsb/db/cockroach/__init__.py | 0 rcsb/db/config/exdb-config-example.yml | 27 - rcsb/db/crate/Connection.py | 155 --- rcsb/db/crate/CrateDbLoader.py | 202 ---- rcsb/db/crate/CrateDbUtil.py | 278 ----- rcsb/db/crate/__init__.py | 0 rcsb/db/helpers/r.py | 53 - rcsb/db/mysql/Connection.py | 89 -- rcsb/db/mysql/ConnectionBase.py | 139 --- rcsb/db/mysql/MyDbAdapter.py | 454 -------- rcsb/db/mysql/MyDbUtil.py | 316 ----- rcsb/db/mysql/MysqlSchemaImporter.py | 151 --- rcsb/db/mysql/SchemaDefLoader.py | 461 -------- rcsb/db/mysql/__init__.py | 0 rcsb/db/sql/QueryDirectives.py | 513 --------- rcsb/db/sql/SqlGen.py | 1018 ----------------- rcsb/db/sql/__init__.py | 0 rcsb/db/tests-cockroach/ConnectionTests.py | 78 -- .../SchemaDefLoaderCockroachDbMultiTests.py | 282 ----- .../SchemaDefLoaderCockroachDbTests.py | 354 ------ rcsb/db/tests-cockroach/__init__.py | 0 .../tests-crate/testSchemaDefLoaderCrateDb.py | 325 ------ .../testSchemaDefLoaderCrateDbMulti.py | 291 ----- rcsb/db/tests-mysql/__init__.py | 0 rcsb/db/tests-mysql/test-output/.gitkeep | 0 rcsb/db/tests-mysql/testConnection.py | 181 --- rcsb/db/tests-mysql/testSchemaDefLoaderDb.py | 257 ----- rcsb/db/tests/testLoggerStream.py | 28 - rcsb/db/tests/testSqlGen.py | 158 --- rcsb/db/utils/SchemaProvider.py | 2 - rcsb/db/utils/makePathList.py | 49 - rcsb/db/utils/unescape.py | 35 - requirements.txt | 13 - setup.py | 2 +- tox.ini | 7 +- 42 files changed, 21 insertions(+), 6649 deletions(-) delete mode 100644 rcsb/db/cockroach/CockroachDbLoader.py delete mode 100644 rcsb/db/cockroach/CockroachDbUtil.py delete mode 100644 rcsb/db/cockroach/Connection.py delete mode 100644 rcsb/db/cockroach/__init__.py delete mode 100644 rcsb/db/crate/Connection.py delete mode 100644 rcsb/db/crate/CrateDbLoader.py delete mode 100644 rcsb/db/crate/CrateDbUtil.py delete mode 100644 rcsb/db/crate/__init__.py delete mode 100644 rcsb/db/helpers/r.py delete mode 100644 rcsb/db/mysql/Connection.py delete mode 100644 rcsb/db/mysql/ConnectionBase.py delete mode 100644 rcsb/db/mysql/MyDbAdapter.py delete mode 100644 rcsb/db/mysql/MyDbUtil.py delete mode 100644 rcsb/db/mysql/MysqlSchemaImporter.py delete mode 100644 rcsb/db/mysql/SchemaDefLoader.py delete mode 100644 rcsb/db/mysql/__init__.py delete mode 100644 rcsb/db/sql/QueryDirectives.py delete mode 100644 rcsb/db/sql/SqlGen.py delete mode 100644 rcsb/db/sql/__init__.py delete mode 100644 rcsb/db/tests-cockroach/ConnectionTests.py delete mode 100644 rcsb/db/tests-cockroach/SchemaDefLoaderCockroachDbMultiTests.py delete mode 100644 rcsb/db/tests-cockroach/SchemaDefLoaderCockroachDbTests.py delete mode 100644 rcsb/db/tests-cockroach/__init__.py delete mode 100644 rcsb/db/tests-crate/testSchemaDefLoaderCrateDb.py delete mode 100644 rcsb/db/tests-crate/testSchemaDefLoaderCrateDbMulti.py delete mode 100644 rcsb/db/tests-mysql/__init__.py delete mode 100644 rcsb/db/tests-mysql/test-output/.gitkeep delete mode 100644 rcsb/db/tests-mysql/testConnection.py delete mode 100644 rcsb/db/tests-mysql/testSchemaDefLoaderDb.py delete mode 100644 rcsb/db/tests/testLoggerStream.py delete mode 100644 rcsb/db/tests/testSqlGen.py delete mode 100644 rcsb/db/utils/makePathList.py delete mode 100644 rcsb/db/utils/unescape.py diff --git a/README.md b/README.md index a0580718..3bc38ec8 100644 --- a/README.md +++ b/README.md @@ -20,17 +20,12 @@ git clone --recurse-submodules https://github.com/rcsb/py-rcsb_db.git ``` -Optionally, run test suite (Python versions 3.9+) using -[setuptools](https://setuptools.readthedocs.io/en/latest/) or -[tox](http://tox.readthedocs.io/en/latest/example/platform.html): +Optionally, run test suite (Python versions 3.9+) using either: -```bash -python setup.py test - -or simply run - -tox -``` +- [setuptools](https://setuptools.readthedocs.io/en/latest/) + with `python setup.py test` +- [tox](http://tox.readthedocs.io/en/latest/example/platform.html) + by running simply `tox` Installation is via the program [pip](https://pypi.python.org/pypi/pip). To run tests from the source tree, the package must be installed in editable mode (i.e. -e): @@ -44,9 +39,7 @@ pip install -e . You will need a few packages, before `pip install .` can work: ```bash - -sudo apt install default-libmysqlclient-dev flex bison - +sudo apt install flex bison ``` ### Installing on macOS diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 97e5e3e3..cf623fe0 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -26,14 +26,14 @@ schedules: jobs: - template: azure-template-tox-job.yml - parameters: {tox: 'format_pep8', python: '3.10', os: 'linux', fixtures: 'mysql,mongodb'} + parameters: {tox: 'format_pep8', python: '3.10', os: 'linux', fixtures: 'mongodb'} - template: azure-template-tox-job.yml - parameters: {tox: 'lint_pylint', python: '3.10', os: 'linux', fixtures: 'mysql,mongodb'} + parameters: {tox: 'lint_pylint', python: '3.10', os: 'linux', fixtures: 'mongodb'} #- template: azure-template-tox-job.yml - # parameters: {tox: 'test_coverage', python: '3.10', os: 'linux', fixtures: 'mysql,mongodb'} + # parameters: {tox: 'test_coverage', python: '3.10', os: 'linux', fixtures: mongodb'} # - template: azure-template-tox-job.yml - parameters: {tox: 'py310', python: '3.10', os: 'linux', fixtures: 'mysql,mongodb'} + parameters: {tox: 'py310', python: '3.10', os: 'linux', fixtures: 'mongodb'} - template: azure-template-publish-job.yml parameters: {tox: 'py310', python: '3.10', os: 'linux'} # diff --git a/azure-template-tox-job.yml b/azure-template-tox-job.yml index 2090c24d..a63cc62d 100644 --- a/azure-template-tox-job.yml +++ b/azure-template-tox-job.yml @@ -1,6 +1,6 @@ # File: azure-template-tox-job.yml # Date: 8-Jul-2019 jdw split out from original pipeline -# Supports: fixtures=mysql,mongodb (linux) +# Supports: fixtures=mongodb (linux) # # Updates: # 6-Aug-2019 jdw build source and binary wheels by default. @@ -63,7 +63,7 @@ jobs: # ---------------------------------------------- - ${{ if startsWith(parameters.os, 'linux') }}: - script: lsb_release -a - displayName: "Ubuntu kernal version" + displayName: "Ubuntu kernel version" - script: which apt displayName: "Installing OS dependencies" - script: apt-cache policy | grep http | awk '{print $2 $3}' | sort -u @@ -80,39 +80,6 @@ jobs: - script: sudo apt-get install bison displayName: "Install bison" # - - ? ${{ if and(contains(parameters.fixtures, 'mysql'), startsWith(parameters.os, 'linux')) }} - : - bash: | - sudo apt-get install python3-dev mysql-server - sudo apt-get install default-libmysqlclient-dev - sudo apt-get install python-mysqldb - sudo apt list --installed | grep -i mysql - displayName: "Install mysql development libraries" - - bash: | - echo "Retarting mysql service" - sudo systemctl restart mysql.service - mysql -V - mysql --user=root --password=root -e "use mysql; select * from user;" - # - echo "Try resetting password" - mysqladmin --user=root --password=root password 'ChangeMeSoon' - # - # mysql -u root -p root -e "SET PASSWORD FOR root@'localhost' = PASSWORD(‘ChangeMeSoon’);" - # mysql -u root -p root -e "FLUSH PRIVILEGES; update mysql.user set password=password('ChangeMeSoon') where user='root'; FLUSH PRIVILEGES;" - # UPDATE mysql.user SET Password=PASSWORD('ChangeMeSoon') WHERE User='root'; - - echo "Running preliminary mysql setup" - mysql --user=root --password=ChangeMeSoon <<_EOF_ - DELETE FROM mysql.user WHERE User=''; - DELETE FROM mysql.user WHERE User='root' AND Host NOT IN ('localhost', '127.0.0.1', '::1'); - DROP DATABASE IF EXISTS test; - DELETE FROM mysql.db WHERE Db='test' OR Db='test\\_%'; - FLUSH PRIVILEGES; - _EOF_ - ps -ef | grep -i my - mysql --user=root --password=ChangeMeSoon -e "show databases;" - # - displayName: "Start and configure mysql ..." - # ----- - ? ${{ if and(contains(parameters.fixtures, 'mongodb'), startsWith(parameters.os, 'linux')) }} : # Mongo install @@ -137,8 +104,11 @@ jobs: - script: 'python -c "import sys; print(sys.version); print(sys.executable)"' displayName: show python information # - - script: python -m pip install --upgrade pip tox - displayName: "Install tools" + - script: python -m pip install --upgrade pip + displayName: "Upgrade pip" + # + - script: pip install tox jsonschema 'rcsb.utils.chemref>=0.91' 'jsondiff>=1.2.0' + displayName: "Install test requirements" # - script: pip install -r requirements.txt displayName: "Install dependencies" diff --git a/pylintrc b/pylintrc index bfb3a90e..ca1a1d96 100644 --- a/pylintrc +++ b/pylintrc @@ -1,10 +1,5 @@ [MASTER] -# A comma-separated list of package or module names from where C extensions may -# be loaded. Extensions are loading into the active Python interpreter and may -# run arbitrary code. -extension-pkg-whitelist=MySQLdb - # Add files or directories to the blacklist. They should be base names, not # paths. ignore=CVS @@ -302,11 +297,6 @@ ignored-argument-names=_.*|^ignored_|^unused_ # Tells whether we should check for unused import in __init__ files. init-import=no -# List of qualified module names which can have objects that can redefine -# builtins. -redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io - - [FORMAT] # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. diff --git a/rcsb/db/cockroach/CockroachDbLoader.py b/rcsb/db/cockroach/CockroachDbLoader.py deleted file mode 100644 index 07d023d2..00000000 --- a/rcsb/db/cockroach/CockroachDbLoader.py +++ /dev/null @@ -1,237 +0,0 @@ -## -# File: CockroachDbLoader.py -# Author: J. Westbrook -# Date: 1-Apr-2018 -# Version: 0.001 Initial version -# -# Loader variant to support stripped down support for Cockroach DB. -# -# Updates: -# -# 31-Mar-2019 jdw add more speific tests for null value suggested by -# issue = MySQL SchemaDefLoader skip zero values #19 -## -""" -Generic mapper of PDBx/mmCIF instance data to SQL loadable data files based on external -schema definition defined in class SchemaDefBase(). -""" - -__docformat__ = "restructuredtext en" -__author__ = "John Westbrook" -__email__ = "jwest@rcsb.rutgers.edu" -__license__ = "Apache 2.0" - -import logging -import time - -from rcsb.db.cockroach.CockroachDbUtil import CockroachDbQuery -from rcsb.db.processors.DataTransformFactory import DataTransformFactory -from rcsb.db.processors.SchemaDefDataPrep import SchemaDefDataPrep -from rcsb.db.sql.SqlGen import SqlGenAdmin - -logger = logging.getLogger(__name__) - - -class CockroachDbLoader(object): - - """Map PDBx/mmCIF instance data to SQL loadable data using external schema definition.""" - - def __init__(self, schemaDefObj, ioObj=None, dbCon=None, workPath=".", cleanUp=False, warnings="default", verbose=True): - """Map PDBx/mmCIF instance data to SQL loadable data using external schema definition. - - Args: - schemaDefObj (object): Description - ioObj (None, optional): Description - dbCon (None, optional): Description - workPath (str, optional): Description - cleanUp (bool, optional): Description - warnings (str, optional): Description - verbose (bool, optional): Description - """ - self.__verbose = verbose - self.__debug = False - self.__sD = schemaDefObj - self.__ioObj = ioObj - # - self.__dbCon = dbCon - self.__workingPath = workPath - self.__pathList = [] - self.__cleanUp = cleanUp - # - # self.__sdp = SchemaDefDataPrep(schemaDefObj=schemaDefObj, ioObj=IoAdapter(), verbose=True) - # - self.__warningAction = warnings - self.__fTypeRow = "skip-max-width" - dtf = DataTransformFactory(schemaDefAccessObj=self.__sD, filterType=self.__fTypeRow) - self.__sdp = SchemaDefDataPrep(schemaDefAccessObj=self.__sD, dtObj=dtf, workPath=self.__workingPath, verbose=self.__verbose) - # - - def load(self, inputPathList=None, containerList=None, loadType="batch-file", deleteOpt=None, tableIdSkipD=None): - """Load data for each table defined in the current schema definition object. - Data are extracted from the input file or container list. - - Data source options: - inputPathList = [, ....] - - or - - containerList = [ data container, ...] - - - loadType = ['cockroack-insert' | 'cockroach-insert-many'] - deleteOpt = 'selected' | 'all' - - tableIdSkipD - searchable container with tableIds to be skipped on loading - - - Loading is performed using the current database server connection. - - Intermediate data files for 'batch-file' loading are created in the current working path. - - Returns True for success or False otherwise. - - Args: - inputPathList (list, optional): Description - containerList (list, optional): Description - loadType (str, optional): Description - deleteOpt (None, optional): Description - tableIdSkipD (None, optional): Description - - Returns: - TYPE: Description - - """ - tableIdSkipD = tableIdSkipD if tableIdSkipD is not None else {} - tableDataDict = {} - containerNameList = [] - if inputPathList is not None: - tableDataDict, containerNameList = self.__sdp.fetch(inputPathList) - elif containerList is not None: - tableDataDict, containerNameList = self.__sdp.process(containerList) - # - if loadType in ["cockroach-insert", "cockroach-insert-many"]: - sqlMode = "single" - if loadType in ["cockroach-insert-many"]: - sqlMode = "many" - for tableId, rowList in tableDataDict.items(): - if tableId in tableIdSkipD: - continue - if deleteOpt in ["all", "truncate", "selected"] or rowList: - self.__cockroachInsertImport(tableId, rowList=rowList, containerNameList=containerNameList, deleteOpt=deleteOpt, sqlMode=sqlMode) - return True - else: - pass - - return False - - def __cockroachInsertImport(self, tableId, rowList=None, containerNameList=None, deleteOpt="selected", sqlMode="many"): - """Load the input table using sql cockroach templated inserts of the input rowlist of dictionaries (i.e. d[attributeId]=value). - - The containerNameList corresponding to the data within loadable data in rowList can be provided - if 'selected' deletions are to performed prior to the the batch data inserts. - - deleteOpt = ['selected','all'] where 'selected' deletes rows corresponding to the input container - list before insert. The 'all' options truncates the table prior to insert. - - Deletions are performed in the absence of loadable data. - - Args: - tableId (TYPE): Description - rowList (None, optional): Description - containerNameList (None, optional): Description - deleteOpt (str, optional): Description - sqlMode (str, optional): Description - - Returns: - TYPE: Description - - """ - startTime = time.time() - crQ = CockroachDbQuery(dbcon=self.__dbCon, verbose=self.__verbose) - sqlGen = SqlGenAdmin(self.__verbose) - # - databaseName = self.__sD.getVersionedDatabaseName() - tableDefObj = self.__sD.getTable(tableId) - tableName = tableDefObj.getName() - tableAttributeIdList = tableDefObj.getAttributeIdList() - tableAttributeNameList = tableDefObj.getAttributeNameList() - # - sqlDeleteList = None - if deleteOpt in ["selected", "delete"] and containerNameList is not None: - deleteAttributeName = tableDefObj.getDeleteAttributeName() - logger.debug("tableName %s delete attribute %s", tableName, deleteAttributeName) - sqlDeleteList = sqlGen.deleteFromListSQL(databaseName, tableName, deleteAttributeName, containerNameList, chunkSize=10) - # logger.debug("Delete SQL for %s : %r" % (tableId, sqlDeleteList)) - elif deleteOpt in ["all", "truncate"]: - sqlDeleteList = [sqlGen.truncateTableSQL(databaseName, tableName)] - # - lenC = len(rowList) - logger.debug("Deleting from table %s length %d", tableName, lenC) - crQ.sqlCommandList(sqlDeleteList) - endTime1 = time.time() - logger.debug("Deleting succeeds for table %s %d rows at %s (%.3f seconds)", tableName, lenC, time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime1 - startTime) - logger.debug("Delete commands %s", sqlDeleteList) - - if not rowList: - logger.debug("Skipping insert for table %s length %d", tableName, len(containerNameList)) - return True - # - logger.debug("Insert begins for table %s with row length %d", tableName, len(rowList)) - sqlInsertList = [] - tupL = list(zip(tableAttributeIdList, tableAttributeNameList)) - if sqlMode == "many": - aList = [] - for tId, nm in tupL: - aList.append(tId) - # - vLists = [] - for row in rowList: - vList = [] - for tId, nm in tupL: - if row[tId] and row[tId] != r"\N": - vList.append(row[tId]) - else: - vList.append(None) - vLists.append(vList) - # - ret = crQ.sqlTemplateCommandMany(sqlTemplate=sqlGen.idInsertTemplateSQL(databaseName, tableDefObj, aList), valueLists=vLists) - endTime = time.time() - if ret: - logger.debug("Insert succeeds for table %s %d rows at %s (%.3f seconds)", tableName, lenC, time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - endTime1) - else: - logger.error("Insert fails for table %s %d rows at %s (%.3f seconds)", tableName, lenC, time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - endTime1) - else: - lenT = -1 - lenR = -1 - aList = [] - for tId, nm in tupL: - aList.append(nm) - # - for row in rowList: - vList = [] - for tId, nm in tupL: - if row[tId] is not None and row[tId] != r"\N": - vList.append(row[tId]) - else: - vList.append(None) - sqlInsertList.append((sqlGen.insertTemplateSQL(databaseName, tableName, aList), vList)) - # - lenT = len(sqlInsertList) - lenR = crQ.sqlTemplateCommandList(sqlInsertList) - # - ret = lenR == lenT - endTime = time.time() - if ret: - logger.debug( - "Insert succeeds for table %s %d of %d rows at %s (%.3f seconds)", - tableName, - lenR, - lenT, - time.strftime("%Y %m %d %H:%M:%S", time.localtime()), - endTime - endTime1, - ) - else: - logger.error( - "Insert fails for table %s %d of %d rows at %s (%.3f seconds)", tableName, lenR, lenT, time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - endTime1 - ) - - return ret diff --git a/rcsb/db/cockroach/CockroachDbUtil.py b/rcsb/db/cockroach/CockroachDbUtil.py deleted file mode 100644 index b8243e64..00000000 --- a/rcsb/db/cockroach/CockroachDbUtil.py +++ /dev/null @@ -1,274 +0,0 @@ -## -# File: CockroachDbUtil.py -# Author: J. Westbrook -# Date: 10-Feb-2018 -# Version: 0.001 Initial version -# -# Updates: -# -## -""" -Utility classes to create connections and process SQL commands with CockroachDb using -the PostgreSQL DB API 2 compatible driver. - - pip install psycopg2 - or - pip install psycopg2-binary -""" -from __future__ import generators - -import logging - -import psycopg2 -import psycopg2.extras - -__docformat__ = "restructuredtext en" -__author__ = "John Westbrook" -__email__ = "jwest@rcsb.rutgers.edu" -__license__ = "Apache 2.0" - -logger = logging.getLogger(__name__) - - -class CockroachDbQuery(object): - - """Parameterized SQL queries using Python DBI protocol...""" - - def __init__(self, dbcon, verbose=True): - self.__dbcon = dbcon - self.__verbose = verbose - self.__ops = ["EQ", "GE", "GT", "LT", "LE", "LIKE", "NOT LIKE"] - self.__opDict = {"EQ": "=", "GE": ">=", "GT": ">", "LT": "<", "LE": "<=", "LIKE": "LIKE", "NOT LIKE": "NOT LIKE"} - self.__logOps = ["AND", "OR", "NOT"] - self.__grpOps = ["BEGIN", "END"] - self.__warningAction = "default" - - def sqlTemplateCommandMany(self, sqlTemplate, valueLists=None, pageSize=100): - """Execute a batch sql commands followed by a single commit. Commands are - are describe in a template with an associated list of values. - - psycopg2.extras.execute_batch(cur, sql, argslist, page_size=100) - - """ - try: - - curs = self.__dbcon.cursor() - # curs.executemany(sqlTemplate, valueLists) - psycopg2.extras.execute_batch(curs, sqlTemplate, valueLists, page_size=pageSize) - curs.close() - return True - except psycopg2.DatabaseError as e: - logger.info("sqlTemplate: %s", sqlTemplate) - logger.debug("valueLists: %r", valueLists) - logger.error("Database error is:\n%s", str(e)) - curs.close() - except Warning as e: - logger.warning("Warning is:\n%s", str(e)) - curs.close() - except Exception as e: - logger.exception("Exception is:\n%s", str(e)) - curs.close() - # - return False - - def sqlTemplateCommand(self, sqlTemplate=None, valueList=None): - """Execute sql template command with associated value list. - - Insert one row - - - Errors and warnings that generate exceptions are caught by this method. - """ - valueList = valueList if valueList else [] - try: - curs = self.__dbcon.cursor() - curs.execute(sqlTemplate, valueList) - curs.close() - return True - except psycopg2.DatabaseError as e: - logger.info(" error is:\n%s\n", str(e)) - curs.close() - except Warning as e: - logger.info(" warning is:\n%s\n", str(e)) - curs.close() - except Exception as e: - logger.info(" exception is:\n%s\n", str(e)) - curs.close() - return False - - def sqlTemplateCommandList(self, sqlTemplateValueList=None): - """Execute sql template command with associated value list. - - Input - - - sqlTemplateValueList [(sqlTemplate,vList), (sqlTemplate, vlist), ... ] - - Insert on row - - - Errors and warnings that generate exceptions are caught by this method. - """ - vL = [] - iFail = 0 - try: - curs = self.__dbcon.cursor() - # - lenT = len(sqlTemplateValueList) - for ii in range(lenT): - tV, vL = sqlTemplateValueList[ii] - try: - curs.execute(tV, vL) - except Exception as e: - iFail += 1 - logger.info(" Error is: %s", str(e)) - # logger.info(" Template for record %d of %d : %s", ii, lenT, t) - logger.info(" Record %d of %d value list: %s", ii, lenT, vL) - # - curs.close() - logger.debug(" Inserted %d of %d values", ii - iFail, lenT) - return ii - iFail + 1 - except psycopg2.DatabaseError as e: - logger.exception(" error is: %s", str(e)) - logger.info(" Record %d of %d value list: %s", ii, lenT, vL) - curs.close() - except Warning as e: - logger.info(" Warning is: %s", str(e)) - logger.info(" Record %d of %d value list: %s", ii, lenT, vL) - curs.close() - except Exception as e: - logger.info(" Exception is: %s", str(e)) - logger.info(" Record %d of %d value list: %s", ii, lenT, vL) - curs.close() - return ii - iFail + 1 - - def sqlCommandList(self, sqlCommandList): - """Execute the input list of SQL commands catching exceptions from the server. - - - The treatment of warning is controlled by a prior setting of self.setWarnings("error"|"ignore"|"default") - """ - - try: - sqlCommand = "" - curs = self.__dbcon.cursor() - for sqlCommand in sqlCommandList: - curs.execute(sqlCommand) - # - curs.close() - return True - except psycopg2.DatabaseError as e: - logger.info(" SQL command failed for:\n%s", sqlCommand) - logger.info(" database error is message is:\n%s", str(e)) - curs.close() - except Warning as e: - logger.info(" warning message is:\n%s", str(e)) - logger.info(" generated warnings for command:\n%s", sqlCommand) - curs.close() - except Exception as e: - logger.info(" exception message is:\n%s\n", str(e)) - logger.exception(" SQL command failed for:\n%s\n", sqlCommand) - curs.close() - - return False - - def sqlCommand(self, queryString): - """Execute SQL command catching exceptions returning no data from the server.""" - try: - curs = self.__dbcon.cursor() - curs.execute(queryString) - curs.close() - return True - except psycopg2.OperationalError as e: - logger.info(" SQL command failed for:\n%s", queryString) - logger.info(" Warning is message is:\n%s", str(e)) - curs.close() - except psycopg2.DatabaseError as e: - logger.info(" SQL command failed for:\n%s", queryString) - logger.info(" Warning is message is:\n%s", str(e)) - curs.close() - except Exception as e: - logger.info(" SQL command failed for:\n%s", queryString) - logger.info(" Warning is message is:\n%s", str(e)) - curs.close() - return [] - - def __fetchIter(self, cursor, rowSize=1000): - """Chunked iterator to manage results fetches to mysql server""" - while True: - results = cursor.fetchmany(rowSize) - if not results: - break - for result in results: - yield result - - def selectRows(self, queryString): - """Execute SQL command and return list of lists for the result set.""" - rowList = [] - try: - curs = self.__dbcon.cursor() - curs.execute(queryString) - while True: - result = curs.fetchone() - if result is not None: - rowList.append(result) - else: - break - curs.close() - return rowList - except psycopg2.ProgrammingError as e: - logger.info(" Warning is message is:\n%s", str(e)) - curs.close() - except psycopg2.OperationalError as e: - logger.info(" Warning is message is:\n%s", str(e)) - logger.info(" SQL command failed for:\n%s", queryString) - curs.close() - except psycopg2.DatabaseError as e: - logger.info(" Warning is message is:\n%s", str(e)) - logger.info(" SQL command failed for:\n%s", queryString) - curs.close() - except Exception as e: - logger.info(" Error message is:\n%s", str(e)) - logger.exception(" SQL command failed for:\n%s", queryString) - curs.close() - - return [] - - def simpleQuery(self, selectList=None, fromList=None, condition="", orderList=None, returnObj=None): - """ """ - # - selectList = selectList if selectList else [] - fromList = fromList if fromList else [] - orderList = orderList if orderList else [] - returnObj = returnObj if returnObj else [] - colsCsv = ",".join(["%s" % k for k in selectList]) - tablesCsv = ",".join(["%s" % k for k in fromList]) - - order = "" - if orderList: - (aV, tV) = orderList[0] - order = " ORDER BY CAST(%s AS %s) " % (aV, tV) - for (aV, tV) in orderList[1:]: - order += ", CAST(%s AS %s) " % (aV, tV) - - # - query = "SELECT " + colsCsv + " FROM " + tablesCsv + condition + order - if self.__verbose: - logger.info("Query: %s\n", query) - curs = self.__dbcon.cursor() - curs.execute(query) - while True: - result = curs.fetchone() - if result is not None: - returnObj.append(result) - else: - break - curs.close() - return returnObj - - def testSelectQuery(self, count): - tSQL = "select %d" % count - # - try: - rowL = self.selectRows(queryString=tSQL) - tup = rowL[0] - return int(str(tup[0])) == count - except Exception: - return False diff --git a/rcsb/db/cockroach/Connection.py b/rcsb/db/cockroach/Connection.py deleted file mode 100644 index 2349b3e9..00000000 --- a/rcsb/db/cockroach/Connection.py +++ /dev/null @@ -1,160 +0,0 @@ -## -# File: Connection.py -# Author: J. Westbrook -# Date: 1-Apr-2018 -# Version: 0.001 Initial version -# -# Updates: -# -## -""" -Utility classes to create connections and process SQL commands with CockroachDb using -the PostgreSQL DB API 2 compatible driver. - - pip install psycopg2 - or - pip install psycopg2-binary -""" -from __future__ import generators - -import copy -import logging - -import psycopg2 -import psycopg2.extras - -__docformat__ = "restructuredtext en" -__author__ = "John Westbrook" -__email__ = "jwest@rcsb.rutgers.edu" -__license__ = "Apache 2.0" - -# -logger = logging.getLogger(__name__) - - -class Connection(object): - """Class to encapsulate connection semantics for PostgresSQL DBI connection for CockroachDB.""" - - def __init__(self, cfgOb=None, infoD=None, resourceName=None, sectionName="site_info_configuration", verbose=False): - self.__verbose = verbose - - self.__dbcon = None - - self.__infoD = infoD - self.__dbName = None - - self.__dbHost = None - self.__dbUser = None - self.__dbPw = None - self.__dbSocket = None - self.__dbPort = None - self.__dbAdminDb = None - self.__dbPort = None - # - self.__defaultPort = 26257 - self.__dbServer = "cockroach" - self.__resourceName = resourceName - # - self.__cfgOb = cfgOb - # - if infoD: - self.setPreferences(infoD) - # - if resourceName: - self.assignResource(resourceName, sectionName=sectionName) - - def getPreferences(self): - return self.__infoD - - def setPreferences(self, infoD): - try: - self.__infoD = copy.deepcopy(infoD) - self.__dbName = self.__infoD.get("DB_NAME") - self.__dbHost = self.__infoD.get("DB_HOST", "localhost") - self.__dbUser = self.__infoD.get("DB_USER", None) - self.__dbPw = self.__infoD.get("DB_PW", None) - self.__dbSocket = self.__infoD.get("DB_SOCKET", None) - self.__dbServer = self.__infoD.get("DB_SERVER", "cockroach") - # - port = self.__infoD.get("DB_PORT", self.__defaultPort) - if port and str(port): - self.__dbPort = int(str(port)) - except Exception as e: - logger.exception("Failing with %s", str(e)) - - def assignResource(self, resourceName=None, sectionName=None): - # - defaultPort = 26257 - defaultHost = "localhost" - dbServer = "cockroach" - defaultDbName = "system" - - self.__resourceName = resourceName - infoD = {} - if not self.__cfgOb: - return infoD - # - if resourceName == "COCKROACH_DB": - infoD["DB_NAME"] = self.__cfgOb.get("COCKROACH_DB_NAME", default=defaultDbName, sectionName=sectionName) - infoD["DB_HOST"] = self.__cfgOb.get("COCKROACH_DB_HOST", sectionName=sectionName) - infoD["DB_SOCKET"] = self.__cfgOb.get("COCKROACH_DB_SOCKET", default=None, sectionName=sectionName) - infoD["DB_PORT"] = int(str(self.__cfgOb.get("COCKROACH_DB_PORT", default=defaultPort, sectionName=sectionName))) - infoD["DB_USER"] = self.__cfgOb.get("COCKROACH_DB_USER_NAME", sectionName=sectionName) - infoD["DB_PW"] = self.__cfgOb.get("COCKROACH_DB_PASSWORD", sectionName=sectionName) - - else: - infoD["DB_NAME"] = self.__cfgOb.get("DB_NAME", default=defaultDbName, sectionName=sectionName) - infoD["DB_HOST"] = self.__cfgOb.get("DB_HOST", default=defaultHost, sectionName=sectionName) - infoD["DB_SOCKET"] = self.__cfgOb.get("DB_SOCKET", default=None, sectionName=sectionName) - infoD["DB_PORT"] = int(str(self.__cfgOb.get("DB_PORT", default=defaultPort, sectionName=sectionName))) - infoD["DB_USER"] = self.__cfgOb.get("DB_USER_NAME", sectionName=sectionName) - infoD["DB_PW"] = self.__cfgOb.get("DB_PASSWORD", sectionName=sectionName) - # - infoD["DB_SERVER"] = dbServer - self.setPreferences(infoD) - # - return copy.deepcopy(infoD) - # - - def connect(self): - """Create a database connection and return a connection object. - - Returns None on failure - """ - # - if self.__dbcon is not None: - # Close an open connection - - logger.info("Closing an existing connection.") - self.close() - - try: - if self.__dbPw: - dbcon = psycopg2.connect(database="%s" % self.__dbName, user="%s" % self.__dbUser, password="%s" % self.__dbPw, host="%s" % self.__dbHost, port=self.__dbPort) - else: - dbcon = psycopg2.connect(database="%s" % self.__dbName, user="%s" % self.__dbUser, host="%s" % self.__dbHost, port=self.__dbPort) - - dbcon.set_session(autocommit=True) - self.__dbcon = dbcon - except Exception as e: - logger.error("Failing with %s", str(e)) - self.__dbcon = None - - return self.__dbcon - - def close(self): - """Close any open database connection.""" - if self.__dbcon is not None: - try: - self.__dbcon.close() - self.__dbcon = None - return True - except Exception as e: - logger.exception("Connection close error %s", str(e)) - - return False - - def __enter__(self): - return self.connect() - - def __exit__(self, *args): - return self.close() diff --git a/rcsb/db/cockroach/__init__.py b/rcsb/db/cockroach/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/rcsb/db/config/exdb-config-example.yml b/rcsb/db/config/exdb-config-example.yml index ade803d9..c23fba1c 100644 --- a/rcsb/db/config/exdb-config-example.yml +++ b/rcsb/db/config/exdb-config-example.yml @@ -41,18 +41,6 @@ site_info_configuration: MONGO_DB_PORT: "27017" _MONGO_DB_USER_NAME: "" _MONGO_DB_PASSWORD: "" - MYSQL_DB_HOST_NAME: localhost - MYSQL_DB_PORT_NUMBER: "3306" - _MYSQL_DB_USER_NAME: wrIzBGtCsQmkjc7tbEPQ3oEaOnpvivXaKcQsvXD6kn4KHMvA7LCL4O9GlAI= - _MYSQL_DB_PASSWORD: qXPp32Z6DhNVMwo9fQIK5+KB13c1Jd43E3Bn6LmJcSyXc0NAt4H/hwo/xglYpmELV5Vqaw== - _MYSQL_DB_PASSWORD_ALT: s6mNxq3FIwZLrLiIeHpDZQcuVxfQqrR3gA+dEMOGgHwsjrJV5da08H74RmnNRus74Q== - MYSQL_DB_DATABASE_NAME: mysql - CRATE_DB_HOST: localhost - CRATE_DB_PORT: "4200" - COCKROACH_DB_HOST: localhost - COCKROACH_DB_PORT: "26257" - COCKROACH_DB_NAME: system - _COCKROACH_DB_USER_NAME: HR2ez8iLbEpvN+hXKIQS3qa6/QpiFRpf/WvrfHiwfjcL09E+iWTQJhsxTsw= # # Primary repository data and related computed repository data paths # @@ -180,7 +168,6 @@ site_info_remote_configuration: #_MONGO_DB_PASSWORD: "" MONGO_DB_ADMIN_DB_NAME: admin # - # # Channel B # MONGO_DB_HOST: 128.6.159.133 # Channel A @@ -190,20 +177,6 @@ site_info_remote_configuration: #_MONGO_DB_PASSWORD: wfm/FNyhON/AGaJfLhL5ZA0ObAVU0DZ0sICB2eemS8GFfAOZvo3Y/vXTkjLayViwppj2MQ== # # - MYSQL_DB_HOST_NAME: localhost - MYSQL_DB_PORT_NUMBER: "3306" - _MYSQL_DB_USER_NAME: wrIzBGtCsQmkjc7tbEPQ3oEaOnpvivXaKcQsvXD6kn4KHMvA7LCL4O9GlAI= - _MYSQL_DB_PASSWORD: qXPp32Z6DhNVMwo9fQIK5+KB13c1Jd43E3Bn6LmJcSyXc0NAt4H/hwo/xglYpmELV5Vqaw== - _MYSQL_DB_PASSWORD_ALT: s6mNxq3FIwZLrLiIeHpDZQcuVxfQqrR3gA+dEMOGgHwsjrJV5da08H74RmnNRus74Q== - MYSQL_DB_DATABASE_NAME: mysql - # - CRATE_DB_HOST: localhost - CRATE_DB_PORT: "4200" - COCKROACH_DB_HOST: localhost - COCKROACH_DB_PORT: "26257" - COCKROACH_DB_NAME: system - _COCKROACH_DB_USER_NAME: HR2ez8iLbEpvN+hXKIQS3qa6/QpiFRpf/WvrfHiwfjcL09E+iWTQJhsxTsw= - # # Primary repository data and related computed repository data paths # BIRD_REPO_PATH: /net/data/components/prd-v3 diff --git a/rcsb/db/crate/Connection.py b/rcsb/db/crate/Connection.py deleted file mode 100644 index 6ee4f132..00000000 --- a/rcsb/db/crate/Connection.py +++ /dev/null @@ -1,155 +0,0 @@ -## -# File: Connection.py -# Author: J. Westbrook -# Date: 1-Apr-2018 -# -# Connection methods for Crate DB. -# -# Updates: -# -## -""" -Connection methods for Crate DB. - -""" -__docformat__ = "restructuredtext en" -__author__ = "John Westbrook" -__email__ = "jwest@rcsb.rutgers.edu" -__license__ = "Apache 2.0" - - -import copy -import logging - -from crate import client - -# from crate.client.exceptions import (DatabaseError, OperationalError, ProgrammingError, Warning) - -logger = logging.getLogger(__name__) - - -class Connection(object): - - """Class to encapsulate Crate RDBMS DBI connection.""" - - def __init__(self, cfgOb=None, infoD=None, resourceName=None, sectionName="site_info_configuration", verbose=False): - self.__verbose = verbose - - self.__db = None - self.__dbcon = None - - self.__infoD = infoD - self.__dbName = None - - self.__dbHost = None - self.__dbUser = None - self.__dbPw = None - self.__dbSocket = None - self.__dbPort = None - self.__dbAdminDb = None - self.__dbPort = None - # - self.__defaultPort = 4200 - self.__dbServer = "crate" - self.__resourceName = resourceName - - self.__cfgOb = cfgOb - # - if infoD: - self.setPreferences(infoD) - # - if resourceName: - self.assignResource(resourceName, sectionName) - - def getPreferences(self): - return self.__infoD - - def setPreferences(self, infoD): - try: - self.__infoD = copy.deepcopy(infoD) - self.__dbName = self.__infoD.get("DB_NAME", None) - self.__dbHost = self.__infoD.get("DB_HOST", "localhost") - self.__dbUser = self.__infoD.get("DB_USER", None) - self.__dbPw = self.__infoD.get("DB_PW", None) - self.__dbSocket = self.__infoD.get("DB_SOCKET", None) - self.__dbServer = self.__infoD.get("DB_SERVER", "crate") - # - port = self.__infoD.get("DB_PORT", self.__defaultPort) - if port and str(port): - self.__dbPort = int(str(port)) - except Exception as e: - logger.exception("Failing with %s", str(e)) - - def assignResource(self, resourceName=None, sectionName=None): - # - defaultPort = 4200 - defaultHost = "localhost" - dbServer = "crate" - - self.__resourceName = resourceName - infoD = {} - if not self.__cfgOb: - return infoD - # - if resourceName == "CRATE_DB": - infoD["DB_NAME"] = self.__cfgOb.get("CRATE_DB_NAME", sectionName=sectionName) - infoD["DB_HOST"] = self.__cfgOb.get("CRATE_DB_HOST", sectionName=sectionName) - infoD["DB_SOCKET"] = self.__cfgOb.get("CRATE_DB_SOCKET", default=None, sectionName=sectionName) - infoD["DB_PORT"] = int(str(self.__cfgOb.get("CRATE_DB_PORT", default=defaultPort, sectionName=sectionName))) - infoD["DB_USER"] = self.__cfgOb.get("CRATE_DB_USER_NAME", sectionName=sectionName) - infoD["DB_PW"] = self.__cfgOb.get("CRATE_DB_PASSWORD", sectionName=sectionName) - - else: - infoD["DB_NAME"] = self.__cfgOb.get("DB_NAME", sectionName=sectionName) - infoD["DB_HOST"] = self.__cfgOb.get("DB_HOST", default=defaultHost, sectionName=sectionName) - infoD["DB_SOCKET"] = self.__cfgOb.get("DB_SOCKET", default=None, sectionName=sectionName) - infoD["DB_PORT"] = int(str(self.__cfgOb.get("DB_PORT", default=defaultPort, sectionName=sectionName))) - infoD["DB_USER"] = self.__cfgOb.get("DB_USER_NAME", sectionName=sectionName) - infoD["DB_PW"] = self.__cfgOb.get("DB_PASSWORD", sectionName=sectionName) - # - infoD["DB_SERVER"] = dbServer - self.setPreferences(infoD) - # - return copy.deepcopy(infoD) - # - - def connect(self): - """Create a database connection and return a connection object. - - Returns None on failure - """ - # - crateHost = "{host}:{port}".format(host=self.__dbHost, port=self.__dbPort) - crateUri = "http://%s" % crateHost - logger.debug("Connection using uri %s", crateUri) - # - dbcon = client.connect(crateUri) - # - if self.__dbcon is not None: - # Close an open connection - - logger.info("Closing an existing connection.\n") - self.close() - try: - dbcon = self.__dbcon = dbcon - except Exception as e: - logger.exception("Connection error to server %s host %s port %d %s", self.__dbServer, self.__dbHost, self.__dbPort, str(e)) - self.__dbcon = None - - return self.__dbcon - - def close(self): - """Close any open database connection.""" - if self.__dbcon is not None: - try: - self.__dbcon.close() - self.__dbcon = None - return True - except Exception as e: - logger.exception("Connection close error %s", str(e)) - return False - - def __enter__(self): - return self.connect() - - def __exit__(self, *args): - return self.close() diff --git a/rcsb/db/crate/CrateDbLoader.py b/rcsb/db/crate/CrateDbLoader.py deleted file mode 100644 index 96f31bb6..00000000 --- a/rcsb/db/crate/CrateDbLoader.py +++ /dev/null @@ -1,202 +0,0 @@ -## -# File: CrateDbLoader.py -# Author: J. Westbrook -# Date: 1-Apr-2018 -# -# Loader variant to support stripped down support for Crate DB. -# -# Updates: -# -# 31-Mar-2019 jdw add more speific tests for null value suggested by -# issue = MySQL SchemaDefLoader skip zero values #19 -## -## -""" -Generic mapper of PDBx/mmCIF instance data to SQL loadable data files based on external -schema definition defined in class SchemaDefBase(). - -""" -__docformat__ = "restructuredtext en" -__author__ = "John Westbrook" -__email__ = "jwest@rcsb.rutgers.edu" -__license__ = "Apache 2.0" - - -import logging -import time - -from rcsb.db.crate.CrateDbUtil import CrateDbQuery -from rcsb.db.processors.DataTransformFactory import DataTransformFactory -from rcsb.db.processors.SchemaDefDataPrep import SchemaDefDataPrep -from rcsb.db.sql.SqlGen import SqlGenAdmin - - -logger = logging.getLogger(__name__) - - -class CrateDbLoader(object): - - """Map PDBx/mmCIF instance data to SQL loadable data using external schema definition.""" - - def __init__(self, schemaDefObj, ioObj=None, dbCon=None, workPath=".", cleanUp=False, warnings="default", verbose=True): - self.__verbose = verbose - self.__debug = False - self.__sD = schemaDefObj - self.__ioObj = ioObj - # - self.__dbCon = dbCon - self.__workingPath = workPath - self.__pathList = [] - self.__cleanUp = cleanUp - # - # self.__sdp = SchemaDefDataPrep(schemaDefAccessObj=schemaDefObj, ioObj=IoAdapter(), verbose=True) - # - self.__warningAction = warnings - # - self.__fTypeRow = "skip-max-width" - dtf = DataTransformFactory(schemaDefAccessObj=self.__sD, filterType=self.__fTypeRow) - self.__sdp = SchemaDefDataPrep(schemaDefAccessObj=self.__sD, dtObj=dtf, workPath=self.__workingPath, verbose=self.__verbose) - # - - def load(self, inputPathList=None, containerList=None, loadType="batch-file", deleteOpt=None, tableIdSkipD=None): - """Load data for each table defined in the current schema definition object. - Data are extracted from the input path or container list. - - Data source options: - - inputPathList = [, ....] - - or - - containerList = [ data container, ...] - - - loadType = ['crate-insert' | 'crate-insert-many'] - deleteOpt = 'selected' | 'all' - - tableIdSkipD - searchable container with tableIds to be skipped on loading - - - Loading is performed using the current database server connection. - - Intermediate data files for 'batch-file' loading are created in the current working path. - - Returns True for success or False otherwise. - - """ - tableIdSkipD = tableIdSkipD if tableIdSkipD is not None else {} - tableDataDict = {} - containerNameList = [] - if inputPathList is not None: - tableDataDict, containerNameList = self.__sdp.fetch(inputPathList) - elif containerList is not None: - tableDataDict, containerNameList = self.__sdp.process(containerList) - # - if loadType in ["crate-insert", "crate-insert-many"]: - sqlMode = "single" - if loadType in ["crate-insert-many"]: - sqlMode = "many" - for tableId, rowList in tableDataDict.items(): - if tableId in tableIdSkipD: - continue - if deleteOpt in ["all", "selected"] or rowList: - self.__crateInsertImport(tableId, rowList=rowList, containerNameList=containerNameList, deleteOpt=deleteOpt, sqlMode=sqlMode) - return True - else: - pass - - return False - - def __crateInsertImport(self, tableId, rowList=None, containerNameList=None, deleteOpt="selected", sqlMode="many", refresh=True): - """Load the input table using sql crate templated inserts of the input rowlist of dictionaries (i.e. d[attributeId]=value). - - The containerNameList corresponding to the data within loadable data in rowList can be provided - if 'selected' deletions are to performed prior to the the batch data inserts. - - deleteOpt = ['selected','all'] where 'selected' deletes rows corresponding to the input container - list before insert. The 'all' options truncates the table prior to insert. - - Deletions are performed in the absence of loadable data. - - """ - startTime = time.time() - sqlRefresh = None - crQ = CrateDbQuery(dbcon=self.__dbCon, verbose=self.__verbose) - sqlGen = SqlGenAdmin(self.__verbose) - # - databaseName = self.__sD.getVersionedDatabaseName() - tableDefObj = self.__sD.getSchemaObject(tableId) - tableName = tableDefObj.getName() - tableAttributeIdList = tableDefObj.getAttributeIdList() - tableAttributeNameList = tableDefObj.getAttributeNameList() - # - sqlDeleteList = None - if deleteOpt in ["selected", "delete"] and containerNameList is not None: - deleteAttributeName = tableDefObj.getDeleteAttributeName() - sqlDeleteList = sqlGen.deleteFromListSQL(databaseName, tableName, deleteAttributeName, containerNameList, chunkSize=10) - logger.debug("Delete SQL for %s : %r", tableId, sqlDeleteList) - elif deleteOpt in ["all", "truncate"]: - sqlDeleteList = [sqlGen.truncateTableSQL(databaseName, tableName)] - # - logger.debug("Deleting from table %s length %d", tableName, len(containerNameList)) - crQ.sqlCommandList(sqlDeleteList) - logger.debug("Delete commands %s", sqlDeleteList) - if not rowList: - return True - if refresh: - sqlRefresh = sqlGen.refreshTableSQLCrate(databaseName, tableName) - crQ.sqlCommand(sqlRefresh) - # - logger.info("Insert begins for table %s with row length %d", tableName, len(rowList)) - sqlInsertList = [] - tupL = list(zip(tableAttributeIdList, tableAttributeNameList)) - if sqlMode == "many": - aList = [] - for tId, nm in tupL: - aList.append(nm) - # - vLists = [] - for row in rowList: - vList = [] - for tId, nm in tupL: - if row[tId] and row[tId] != r"\N": - vList.append(row[tId]) - else: - vList.append(None) - vLists.append(vList) - # - lenT = len(vLists) - lenR = crQ.sqlTemplateCommandMany(sqlTemplate=sqlGen.insertTemplateSQLCrate(databaseName, tableName, aList), valueLists=vLists) - ret = lenR == len(vLists) - else: - aList = [] - for tId, nm in tupL: - aList.append(nm) - # - for row in rowList: - vList = [] - for tId, nm in tupL: - if row[tId] is not None and row[tId] != r"\N": - vList.append(row[tId]) - else: - vList.append(None) - sqlInsertList.append((sqlGen.insertTemplateSQLCrate(databaseName, tableName, aList), vList)) - # - lenT = len(sqlInsertList) - lenR = crQ.sqlTemplateCommandList(sqlInsertList) - ret = lenR == lenT - if refresh: - sqlRefresh = sqlGen.refreshTableSQLCrate(databaseName, tableName) - crQ.sqlCommand(sqlRefresh) - # - endTime = time.time() - if ret: - logger.info( - "Insert succeeds for table %s %d of %d rows at %s (%.3f seconds)", tableName, lenR, lenT, time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - startTime - ) - else: - logger.info("Insert fails for table %s %d of %d rows at %s (%.3f seconds)", tableName, lenR, lenT, time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - startTime) - return ret - - -if __name__ == "__main__": - pass diff --git a/rcsb/db/crate/CrateDbUtil.py b/rcsb/db/crate/CrateDbUtil.py deleted file mode 100644 index 6b35018e..00000000 --- a/rcsb/db/crate/CrateDbUtil.py +++ /dev/null @@ -1,278 +0,0 @@ -## -# File: CrateDbUtil.py -# Author: J. Westbrook -# Date: 21-Dec-2017 -# Version: 0.001 Initial version -# -# Updates: -# -## -""" -Utility classes to create connections and process SQL commands with CrateDb. - -""" -from __future__ import generators - -import logging - -from crate.client.exceptions import DatabaseError, OperationalError, ProgrammingError, Warning # pylint: disable=redefined-builtin - -__docformat__ = "restructuredtext en" -__author__ = "John Westbrook" -__email__ = "jwest@rcsb.rutgers.edu" -__license__ = "Apache 2.0" - -logger = logging.getLogger(__name__) - - -class CrateDbQuery(object): - - """Parameterized SQL queries using Python DBI protocol...""" - - def __init__(self, dbcon, verbose=True): - self.__dbcon = dbcon - self.__verbose = verbose - self.__ops = ["EQ", "GE", "GT", "LT", "LE", "LIKE", "NOT LIKE"] - self.__opDict = {"EQ": "=", "GE": ">=", "GT": ">", "LT": "<", "LE": "<=", "LIKE": "LIKE", "NOT LIKE": "NOT LIKE"} - self.__logOps = ["AND", "OR", "NOT"] - self.__grpOps = ["BEGIN", "END"] - self.__warningAction = "default" - - def sqlTemplateCommandMany(self, sqlTemplate, valueLists=None): - """Execute a batch sql commands followed by a single commit. Commands are - are describe in a template with an associated list of values. - - cursor.executemany("INSERT INTO locations (name, date, kind, position) VALUES (?, ?, ?, ?)", - ... [('Cloverleaf', '2007-03-11', 'Quasar', 7), - ... ('Old Faithful', '2007-03-11', 'Quasar', 7)]) - [{u'rowcount': 1}, {u'rowcount': 1}] - - """ - lenR = 0 - ret = [] - iFail = 0 - try: - lenT = len(valueLists) - curs = self.__dbcon.cursor() - ret = curs.executemany(sqlTemplate, valueLists) - lenR = len(ret) - logger.debug("Return len %d", len(ret)) - for i, _ in enumerate(ret): - if ret[i]["rowcount"] != 1: - iFail += 1 - logger.info("Insert fails on row %d of %d with values: %r", i, lenT, valueLists[i]) - curs.close() - return lenR - iFail - except DatabaseError as e: - logger.info("sqlTemplate %s", sqlTemplate) - logger.info("return list %r", ret) - logger.error("error is:\n%s", str(e)) - curs.close() - except Warning as e: - logger.warning("warning is:\n%s", str(e)) - curs.close() - except Exception as e: - logger.exception("Exception is:\n%s", str(e)) - curs.close() - # - lenR = len(ret) - return lenR - iFail - - def sqlTemplateCommand(self, sqlTemplate=None, valueList=None): - """Execute sql template command with associated value list. - - Insert one row - - - Errors and warnings that generate exceptions are caught by this method. - """ - valueList = valueList if valueList else [] - try: - curs = self.__dbcon.cursor() - curs.execute(sqlTemplate, valueList) - curs.close() - return True - except DatabaseError as e: - logger.info(" error is:\n%s\n", str(e)) - curs.close() - except Warning as e: - logger.info(" warning is:\n%s\n", str(e)) - curs.close() - except Exception as e: - logger.info(" exception is:\n%s\n", str(e)) - curs.close() - return False - - def sqlTemplateCommandList(self, sqlTemplateValueList=None): - """Execute sql template command with associated value list. - - Input - - - sqlTemplateValueList [(sqlTemplate,vList), (sqlTemplate, vlist), ... ] - - Insert on row - - - Errors and warnings that generate exceptions are caught by this method. - """ - vL = [] - iFail = 0 - try: - curs = self.__dbcon.cursor() - # - lenT = len(sqlTemplateValueList) - for ii in range(lenT): - tV, vL = sqlTemplateValueList[ii] - try: - curs.execute(tV, vL) - except Exception as e: - iFail += 1 - logger.info(" Error is: %s", str(e)) - # logger.info(" Template for record %d of %d : %s" % (ii, lenT, t)) - logger.info(" Record %d of %d value list: %s", ii, lenT, vL) - # - curs.close() - logger.debug(" Inserted %d of %d values", ii - iFail, lenT) - return ii - iFail + 1 - except DatabaseError as e: - logger.exception(" error is: %s", str(e)) - logger.info(" Record %d of %d value list: %s", ii, lenT, vL) - curs.close() - except Warning as e: - logger.info(" Warning is: %s", str(e)) - logger.info(" Record %d of %d value list: %s", ii, lenT, vL) - curs.close() - except Exception as e: - logger.info(" Exception is: %s", str(e)) - logger.info(" Record %d of %d value list: %s", ii, lenT, vL) - curs.close() - return ii - iFail + 1 - - def sqlCommandList(self, sqlCommandList): - """Execute the input list of SQL commands catching exceptions from the server. - - The treatment of warning is controlled by a prior setting of self.setWarnings("error"|"ignore"|"default") - """ - - try: - sqlCommand = "" - curs = self.__dbcon.cursor() - for sqlCommand in sqlCommandList: - curs.execute(sqlCommand) - # - curs.close() - return True - except DatabaseError as e: - logger.info(" SQL command failed for:\n%s", sqlCommand) - logger.info(" database error is message is:\n%s", str(e)) - curs.close() - except Warning as e: - logger.info(" warning message is:\n%s", str(e)) - logger.info(" generated warnings for command:\n%s", sqlCommand) - curs.close() - except Exception as e: - logger.info(" exception message is:\n%s\n", str(e)) - logger.exception(" SQL command failed for:\n%s\n", sqlCommand) - curs.close() - - return False - - def sqlCommand(self, queryString): - """Execute SQL command catching exceptions returning no data from the server.""" - try: - curs = self.__dbcon.cursor() - curs.execute(queryString) - curs.close() - return True - except OperationalError as e: - logger.info(" SQL command failed for:\n%s", queryString) - logger.info(" warning is message is:\n%s", str(e)) - curs.close() - except DatabaseError as e: - logger.info(" SQL command failed for:\n%s\n", queryString) - logger.info(" MySQL warning is message is:\n%s\n", str(e)) - curs.close() - except Exception as e: - logger.exception(" SQL command failed for:\n%s\n with %s", queryString, str(e)) - curs.close() - return [] - - def __fetchIter(self, cursor, rowSize=1000): - """Chunked iterator to manage results fetches to mysql server""" - while True: - results = cursor.fetchmany(rowSize) - if not results: - break - for result in results: - yield result - - def selectRows(self, queryString): - """Execute SQL command and return list of lists for the result set.""" - rowList = [] - try: - curs = self.__dbcon.cursor() - curs.execute(queryString) - while True: - result = curs.fetchone() - if result is not None: - rowList.append(result) - else: - break - curs.close() - return rowList - except ProgrammingError as e: - logger.info(" MySQL warning is message is:\n%s\n", str(e)) - curs.close() - except OperationalError as e: - logger.info(" MySQL warning is message is:\n%s\n", str(e)) - logger.info(" SQL command failed for:\n%s\n", queryString) - curs.close() - except DatabaseError as e: - logger.info(" MySQL warning is message is:\n%s\n", str(e)) - logger.info(" SQL command failed for:\n%s\n", queryString) - curs.close() - except Exception as e: - logger.exception(" SQL command failed for:\n%s\n with %s", queryString, str(e)) - curs.close() - - return [] - - def simpleQuery(self, selectList=None, fromList=None, condition="", orderList=None, returnObj=None): - """ """ - # - selectList = selectList if selectList else [] - fromList = fromList if fromList else [] - orderList = orderList if orderList else [] - returnObj = returnObj if returnObj else [] - colsCsv = ",".join(["%s" % k for k in selectList]) - tablesCsv = ",".join(["%s" % k for k in fromList]) - - order = "" - if orderList: - (aV, tV) = orderList[0] - order = " ORDER BY CAST(%s AS %s) " % (aV, tV) - for (aV, tV) in orderList[1:]: - order += ", CAST(%s AS %s) " % (aV, tV) - - # - query = "SELECT " + colsCsv + " FROM " + tablesCsv + condition + order - if self.__verbose: - logger.info("Query: %s\n", query) - curs = self.__dbcon.cursor() - curs.execute(query) - while True: - result = curs.fetchone() - if result is not None: - returnObj.append(result) - else: - break - curs.close() - return returnObj - - def testSelectQuery(self, count): - tSQL = "select %d" % count - # - try: - rowL = self.selectRows(queryString=tSQL) - tup = rowL[0] - return int(str(tup[0])) == count - except Exception: - return False diff --git a/rcsb/db/crate/__init__.py b/rcsb/db/crate/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/rcsb/db/helpers/r.py b/rcsb/db/helpers/r.py deleted file mode 100644 index d4a8651e..00000000 --- a/rcsb/db/helpers/r.py +++ /dev/null @@ -1,53 +0,0 @@ -def doRangesOverlap(r1, r2): - if r1.start == r1.stop or r2.start == r2.stop: - return False - return (r1.start < r2.stop and r1.stop > r2.start) or (r1.stop > r2.start and r2.stop > r1.start) - - -def splitRangeList(rngL): - """Separate the input list range objects into sublists of non-overlapping range segments - - Args: - rngL (list): list or range objects - - Returns: - (dict): dictionary of sublists (w/ keys 1,2,3) of non-overlapping range segments - """ - grpD = {} - numG = 0 - try: - rngL.sort(key=lambda r: r.stop - r.start + 1, reverse=True) - for rng in rngL: - inGroup = False - igrp = 0 - for grp, trngL in grpD.items(): - inGroup = any([doRangesOverlap(rng, trng) for trng in trngL]) - if inGroup: - igrp = grp - break - numG = numG if inGroup else numG + 1 - igrp = igrp if inGroup else numG - grpD.setdefault(igrp, []).append(rng) - except Exception as e: - # logger.exception("Failing with %s", str(e)) - print(str(e)) - - return grpD - - -def testRangeSplit(): - tupL = [(1, 2), (1, 3), (1, 10), (11, 20), (19, 25), (30, 100), (1, 100), (200, 300), (350, 1400)] - # tupL.sort(key=lambda t: t[1] - t[0] + 1, reverse=True) - rngL = [] - for tup in tupL: - rngL.append(range(tup[0], tup[1])) - for ii in range(1, len(rngL)): - print(ii) - print(doRangesOverlap(rngL[ii - 1], rngL[ii])) - - grpD = splitRangeList(rngL) - print(grpD) - - -if __name__ == "__main__": - testRangeSplit() diff --git a/rcsb/db/mysql/Connection.py b/rcsb/db/mysql/Connection.py deleted file mode 100644 index 3a909a33..00000000 --- a/rcsb/db/mysql/Connection.py +++ /dev/null @@ -1,89 +0,0 @@ -## -# File: Connection.py -# Date: 25-Mar-2018 J. Westbrook -# -# Update: -# 31-Mar-2018 jdw add context methods -# 23-Oct-2018 jdw add section name config access methods and make this a constructor argument -## -""" -Derived class for managing database credentials from a generic configuration file object. - -""" -__docformat__ = "restructuredtext en" -__author__ = "John Westbrook" -__email__ = "jwest@rcsb.rutgers.edu" -__license__ = "Apache 2.0" - - -import logging -import platform - -import MySQLdb - -from rcsb.db.mysql.ConnectionBase import ConnectionBase - -logger = logging.getLogger(__name__) -# -# -if platform.system() == "Linux": - try: - import sqlalchemy.pool as pool # pylint: disable=import-error - - MySQLdb = pool.manage(MySQLdb, pool_size=12, max_overflow=12, timeout=30, echo=False, use_threadlocal=False) - except Exception as e: - logger.exception("Creating MYSQL connection pool failing with %s", str(e)) - - -class Connection(ConnectionBase): - def __init__(self, cfgOb=None, infoD=None, resourceName=None, sectionName="site_info_configuration", verbose=False): - super(Connection, self).__init__(verbose=verbose) - # - self.__cfgOb = cfgOb - # - if infoD: - self.setPreferences(infoD) - # - if resourceName: - self.assignResource(resourceName, sectionName) - # - - def assignResource(self, resourceName=None, sectionName=None): - """ """ - # - defaultPort = 3306 - dbServer = "mysql" - self._assignResource(resourceName, sectionName) - infoD = {} - # if not self.__cfgOb: - # return infoD - # - if not resourceName or not sectionName: - logger.exception("Missing resource specifiers resourceName %r sectionName %r", resourceName, sectionName) - - if resourceName == "MYSQL_DB": - - infoD["DB_NAME"] = self.__cfgOb.get("MYSQL_DB_DATABASE_NAME", sectionName=sectionName) - infoD["DB_HOST"] = self.__cfgOb.get("MYSQL_DB_HOST_NAME", sectionName=sectionName) - infoD["DB_PORT"] = self.__cfgOb.get("MYSQL_DB_PORT_NUMBER", default=defaultPort, sectionName=sectionName) - infoD["DB_SOCKET"] = self.__cfgOb.get("MYSQL_DB_SOCKET", default=None, sectionName=sectionName) - infoD["DB_USER"] = self.__cfgOb.get("_MYSQL_DB_USER_NAME", sectionName=sectionName) - infoD["DB_PW"] = self.__cfgOb.get("_MYSQL_DB_PASSWORD", sectionName=sectionName) - else: - pass - - infoD["DB_PORT"] = int(str(infoD["DB_PORT"])) - infoD["DB_SERVER"] = dbServer - - self.setPreferences(infoD) - # - return infoD - - # - - def __enter__(self): - self.openConnection() - return self.getClientConnection() - - def __exit__(self, *args): - return self.closeConnection() diff --git a/rcsb/db/mysql/ConnectionBase.py b/rcsb/db/mysql/ConnectionBase.py deleted file mode 100644 index 46b6da6a..00000000 --- a/rcsb/db/mysql/ConnectionBase.py +++ /dev/null @@ -1,139 +0,0 @@ -## -# File: ConnectionBase.py -# Date: 25-Mar-2018 J. Westbrook -# -# Update: -## -""" -Base class for managing database connection for MySQL. Application credentials are -handled by the derived class. - -""" -__docformat__ = "restructuredtext en" -__author__ = "John Westbrook" -__email__ = "jwest@rcsb.rutgers.edu" -__license__ = "Apache 2.0" - - -import copy -import logging -import platform - -import MySQLdb - -logger = logging.getLogger(__name__) -# -# -if platform.system() == "Linux": - try: - import sqlalchemy.pool as pool # pylint: disable=import-error - - MySQLdb = pool.manage(MySQLdb, pool_size=12, max_overflow=12, timeout=30, echo=False, use_threadlocal=False) - except Exception as e: - logger.exception("Creating MYSQL connection pool failing with %s", str(e)) - - -class ConnectionBase(object): - def __init__(self, siteId=None, verbose=False): - self.__verbose = verbose - # - self.__siteId = siteId - - self.__db = None - self._dbCon = None - - self.__infoD = {} - self.__databaseName = None - self.__dbHost = None - self.__dbUser = None - self.__dbPw = None - self.__dbSocket = None - self.__dbPort = None - self.__dbAdminDb = None - self.__dbPort = None - self.__defaultPort = 3306 - self.__dbServer = "mysql" - self.__resourceName = None - self.__sectionName = None - - def assignResource(self, resourceName=None, sectionName=None): - # implement in the derived class - self._assignResource(resourceName, sectionName) - - def _assignResource(self, resourceName, sectionName): - self.__resourceName = resourceName - self.__sectionName = sectionName - - def getPreferences(self): - return self.__infoD - - def setPreferences(self, infoD): - try: - self.__infoD = copy.deepcopy(infoD) - self.__databaseName = self.__infoD.get("DB_NAME", None) - self.__dbHost = self.__infoD.get("DB_HOST", "localhost") - self.__dbUser = self.__infoD.get("DB_USER", None) - self.__dbPw = self.__infoD.get("DB_PW", None) - self.__dbSocket = self.__infoD.get("DB_SOCKET", None) - self.__dbServer = self.__infoD.get("DB_SERVER", "mysql") - # - port = self.__infoD.get("DB_PORT", self.__defaultPort) - if port is not None: - self.__dbPort = int(str(port)) - except Exception as e: - logger.exception("Failing with %s", str(e)) - - def openConnection(self): - """Create a database connection and return a connection object. - - Returns None on failure - """ - # - if self._dbCon is not None: - # Close an open connection - - logger.info("+MyDbConnect.connect() WARNING Closing an existing connection.") - self.closeConnection() - - try: - if self.__dbSocket is None: - dbcon = MySQLdb.connect( - db="%s" % self.__databaseName, user="%s" % self.__dbUser, passwd="%s" % self.__dbPw, host="%s" % self.__dbHost, port=self.__dbPort, local_infile=1 - ) - else: - dbcon = MySQLdb.connect( - db="%s" % self.__databaseName, - user="%s" % self.__dbUser, - passwd="%s" % self.__dbPw, - host="%s" % self.__dbHost, - port=self.__dbPort, - unix_socket="%s" % self.__dbSocket, - local_infile=1, - ) - - self._dbCon = dbcon - return True - except Exception as e: - logger.exception("Connection error to resource %s with %s", self.__resourceName, str(e)) - self._dbCon = None - - return False - - def getClientConnection(self): - return self._dbCon - - def closeConnection(self): - """Close db session""" - if self._dbCon is not None: - self._dbCon.close() - self._dbCon = None - return True - else: - return False - - def getCursor(self): - try: - return self._dbCon.cursor() - except Exception as e: - logger.exception("Failing with %s", str(e)) - - return None diff --git a/rcsb/db/mysql/MyDbAdapter.py b/rcsb/db/mysql/MyDbAdapter.py deleted file mode 100644 index 97382b82..00000000 --- a/rcsb/db/mysql/MyDbAdapter.py +++ /dev/null @@ -1,454 +0,0 @@ -## -# File: MyDdAdapter.py -# Date: 10-April-2014 J.Westbrook -# -# Updates: -# -# 11-April-2014 jdw Generalized from WFTaskRequestDBAdapter.py -# 13-April-2014 jdw working with workflow schema WFTaskRequest() - -# 19-Feb -2015 jdw various fixes -# 10-July -2015 jdw Change method/class names from MySqlGen -# 10-March-2018 jdw Py2->Py3 compatibility using driver fork described at https://mysqlclient.readthedocs.io/user_guide.html# -# 29-March-2018 jdw remove dependency on wwPDB configuration - Use generic configuratio object in constructor - -# 9-July -2018 jdw flip back to time.time() -# -### -## -""" -Database adapter for managing simple access and persistance queries using a MySQL relational database store. -""" -__docformat__ = "restructuredtext en" -__author__ = "John Westbrook" -__email__ = "jwest@rcsb.rutgers.edu" -__license__ = "Apache 2.0" - - -import copy -import logging -import time - -from rcsb.db.mysql.Connection import Connection -from rcsb.db.mysql.MyDbUtil import MyDbQuery -from rcsb.db.sql.SqlGen import SqlGenAdmin, SqlGenCondition, SqlGenQuery - -logger = logging.getLogger(__name__) - - -class MyDbAdapter(object): - - """Database adapter for managing simple access and persistance queries using a MySQL relational database store.""" - - def __init__(self, schemaDefObj, cfgOb=None, verbose=False): - self.__verbose = verbose - self.__debug = False - self.__cfgOb = cfgOb - # - self.__sD = schemaDefObj - self.__databaseName = self.__sD.getDatabaseName() - self.__dbCon = None - self.__cObj = None - self.__defaultD = {} - self.__attributeParameterMap = {} - self.__attributeConstraintParameterMap = {} - - def __open(self, infoD): - cObj = Connection() - cObj.setPreferences(infoD) - ok = cObj.openConnection() - if ok: - return cObj - else: - return None - - def __close(self, cObj): - if cObj is not None: - cObj.closeConnection() - return True - else: - return False - - def __getClientConnection(self, cObj): - return cObj.getClientConnection() - - def _open(self, dbServer=None, dbHost=None, dbName=None, dbUser=None, dbPw=None, dbSocket=None, dbPort=None): - """Open a connection to the data base server hosting WF status and tracking data - - - Internal configuration details will be used if these are not externally supplied. - """ - infoD = {} - infoD["DB_HOST"] = dbHost if dbHost is not None else self.__cfgOb.get("SITE_DB_HOST_NAME") - infoD["DB_PORT"] = dbPort if dbPort is not None else self.__cfgOb.get("SITE_DB_PORT_NUMBER") - infoD["DB_NAME"] = dbName if dbName is not None else self.__cfgOb.get("SITE_DB_DATABASE_NAME") - infoD["DB_USER"] = dbUser if dbUser is not None else self.__cfgOb.get("SITE_DB_USER_NAME") - infoD["DB_PW"] = dbPw if dbPw is not None else self.__cfgOb.get("SITE_DB_PASSWORD") - infoD["DB_SERVER"] = dbServer if dbServer is not None else self.__cfgOb.get("SITE_DB_SERVER") - infoD["DB_SOCKET"] = dbSocket if dbSocket is not None else self.__cfgOb.get("SITE_DB_SOCKET") - # - self.__cObj = self.__open(infoD) - self.__dbCon = self.__getClientConnection(self.__cObj) - return self.__dbCon is not None - - def _close(self): - """Close connection to the data base server hosting WF status and tracking data -""" - if self.__dbCon is not None: - self.__close(self.__cObj) - self.__dbCon = None - self.__cObj = None - - def _setDebug(self, flag=True): - self.__debug = flag - - def _setDataStore(self, dataStoreName): - """Set/reassign the database for all subsequent transactions.""" - self.__databaseName = dataStoreName - - def _getParameterDefaultValues(self, contextId): - if contextId is not None and contextId in self.__defaultD: - return self.__defaultD[contextId] - else: - return {} - - def _setParameterDefaultValues(self, contextId, valueD): - """Set the optional lookup dictionary of default values for unspecified parameters... - - valueD = { 'paramName1': , 'paramName2' : , ... } - """ - self.__defaultD[contextId] = copy.deepcopy(valueD) - return True - - def _setAttributeParameterMap(self, tableId, mapL): - """Set list of correspondences between method parameters and table attribute IDs. - - These correspondences are used to map key-value parameter pairs to their associated table attribute values. - - mapL=[ (atId1,paramName1),(atId2,paramName2),... ] - """ - self.__attributeParameterMap[tableId] = mapL - return True - - def _getDefaultAttributeParameterMap(self, tableId): - """Return default attributeId parameter name mappings for the input tableId. - - mapL=[ (atId1,paramName1),(atId2,paramName2),... ] - """ - return self.__sD.getDefaultAttributeParameterMap(tableId) - - def _getAttributeParameterMap(self, tableId): - """ - For the input table return the method keyword argument name to table attribute mapping - - """ - if tableId is not None and tableId in self.__attributeParameterMap: - return self.__attributeParameterMap[tableId] - else: - return [] - - def _getConstraintParameterMap(self, tableId): - """ - For the input table return the method keyword argument name to table attribute mapping for - those attributes that serve as constraints for update transactions - - - """ - if tableId is not None and tableId in self.__attributeConstraintParameterMap: - return self.__attributeConstraintParameterMap[tableId] - else: - return [] - - def _setConstraintParameterMap(self, tableId, mapL): - """Set list of correspondences between method parameters and table attribute IDs to be used as - contraints in update operations. - - These correspondences are used to map key-value paramter pairs to their associated table attribute values. - - mapL=[ (atId1,paramName1),(atId2,paramName2),... ] - """ - self.__attributeConstraintParameterMap[tableId] = mapL - return True - - def _createSchema(self): - """Create table schema using the current class schema definition""" - if self.__debug: - startTime = time.time() - logger.info("Starting at %s", time.strftime("%Y %m %d %H:%M:%S", time.localtime())) - ret = False - try: - iOpened = False - if self.__dbCon is None: - self._open() - iOpened = True - # - tableIdList = self.__sD.getSchemaIdList() - myQ = MyDbQuery(dbcon=self.__dbCon, verbose=self.__verbose) - myAd = SqlGenAdmin(self.__verbose) - - for tableId in tableIdList: - sqlL = [] - tableDefObj = self.__sD.getSchemaObject(tableId) - sqlL.extend(myAd.createTableSQL(databaseName=self.__databaseName, tableDefObj=tableDefObj)) - - ret = myQ.sqlCommand(sqlCommandList=sqlL) - if self.__verbose: - logger.info("For tableId %s server returns: %s\n", tableId, ret) - if self.__debug: - logger.info("SQL: %s\n", "\n".join(sqlL)) - if iOpened: - self._close() - except Exception as e: - status = " table create error " + str(e) - logger.info("status %s\n", status) - if self.__verbose: - logger.exception("Failing with %s", str(e)) - - if self.__debug: - endTime = time.time() - logger.info("Completed at %s (%.3f seconds)\n", time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - startTime) - return ret - - def _getSecondsSinceEpoch(self): - """Return number of seconds since the epoch at the precision of the local installation. - Typically a floating point value with microsecond precision. - - This is used as the default time reference (e.g. timestamp) for monitoring task requests. - """ - return time.time() - - def _insertRequest(self, tableId, contextId, **kwargs): - """Insert into the input table using the keyword value pairs provided as input arguments - - - The contextId controls the handling default values for unspecified parameters. - """ - startTime = time.time() - if self.__debug: - logger.info("Starting at %s\n", time.strftime("%Y %m %d %H:%M:%S", time.localtime())) - ret = False - try: - iOpened = False - if self.__dbCon is None: - self._open() - iOpened = True - # - tableDefObj = self.__sD.getSchemaObject(tableId) - # - myQ = MyDbQuery(dbcon=self.__dbCon, verbose=self.__verbose) - myAd = SqlGenAdmin(self.__verbose) - defaultValD = self._getParameterDefaultValues(contextId=contextId) - # - # Create the attribute and value list for template -- - # - vList = [] - aList = [] - for atId, kwId in self._getAttributeParameterMap(tableId=tableId): - if kwId in kwargs and kwargs[kwId] is not None: - vList.append(kwargs[kwId]) - aList.append(atId) - else: - # use the default values if these exist - if kwId in defaultValD and defaultValD[kwId] is not None: - vList.append(defaultValD[kwId]) - aList.append(atId) - else: - # appropriate null handling -- all fields must be assigned on insert -- - vList.append(tableDefObj.getAppNullValue(atId)) - aList.append(atId) - - sqlT = myAd.idInsertTemplateSQL(self.__databaseName, tableDefObj, insertAttributeIdList=aList) - if self.__debug: - logger.info("aList %d vList %d\n", len(aList), len(vList)) - logger.info("insert template sql=\n%s\n", sqlT) - logger.info("insert values vList=\n%r\n", vList) - # sqlC = sqlT % vList - # logger.info("insert sql command =\n%s\n", sqlC) - ret = myQ.sqlTemplateCommand(sqlTemplate=sqlT, valueList=vList) - if iOpened: - self._close() - - except Exception as e: - status = " insert operation error " + str(e) - logger.info("status %s\n", status) - if self.__verbose: - logger.exception("Failing with %s", str(e)) - if self.__debug: - endTime = time.time() - logger.info("Completed %s (%.3f seconds)\n", time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - startTime) - - return ret - - def _updateRequest(self, tableId, contextId, **kwargs): - """Update the input table using the keyword value pairs provided as input arguments - - - The contextId controls the handling default values for unspecified parameters. - - """ - startTime = time.time() - if self.__debug: - logger.info("Starting at %s", time.strftime("%Y %m %d %H:%M:%S", time.localtime())) - ret = False - try: - iOpened = False - if self.__dbCon is None: - self._open() - iOpened = True - # - tableDefObj = self.__sD.getSchemaObject(tableId) - # - myQ = MyDbQuery(dbcon=self.__dbCon, verbose=self.__verbose) - myAd = SqlGenAdmin(self.__verbose) - defaultValD = self._getParameterDefaultValues(contextId=contextId) - cIdList = self._getConstraintParameterMap(tableId) - - # - # create the value list for template -- - # - vList = [] - aList = [] - cList = [] - for atId, kwId in self._getAttributeParameterMap(tableId): - if (atId, kwId) in cIdList: - continue - if kwId in kwargs and kwargs[kwId] is not None: - vList.append(kwargs[kwId]) - aList.append(atId) - else: - if kwId in defaultValD and defaultValD[kwId] is not None: - vList.append(defaultValD[kwId]) - aList.append(atId) - - for atId, kwId in cIdList: - if kwId in kwargs and kwargs[kwId] is not None: - vList.append(kwargs[kwId]) - cList.append(atId) - - sqlT = myAd.idUpdateTemplateSQL(self.__databaseName, tableDefObj, updateAttributeIdList=aList, conditionAttributeIdList=cList) - if self.__debug: - logger.info("update sql: %s", sqlT) - logger.info("update values: %r", vList) - ret = myQ.sqlTemplateCommand(sqlTemplate=sqlT, valueList=vList) - if iOpened: - self._close() - - except Exception as e: - status = " update operation error " + str(e) - logger.info("status %s", status) - if self.__verbose: - logger.exception("Failing with %s", str(e)) - if self.__debug: - endTime = time.time() - logger.info("Completed at %s (%.3f seconds)", time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - startTime) - return ret - - def _select(self, tableId, **kwargs): - """Construct a selection query for input table and optional constraints provided as keyword value pairs in the - input arguments. Return a list of dictionaries of these query details including all table attributes. - """ - startTime = time.time() - if self.__debug: - logger.info("Starting at %s", time.strftime("%Y %m %d %H:%M:%S", time.localtime())) - rdList = [] - try: - iOpened = False - if self.__dbCon is None: - self._open() - iOpened = True - # - tableDefObj = self.__sD.getSchemaObject(tableId) - myQ = MyDbQuery(dbcon=self.__dbCon, verbose=self.__verbose) - sqlGen = SqlGenQuery(schemaDefObj=self.__sD, verbose=self.__verbose) - sqlGen.setDatabase(databaseName=self.__databaseName) - sqlConstraint = SqlGenCondition(schemaDefObj=self.__sD, verbose=self.__verbose) - # - atMapL = self._getAttributeParameterMap(tableId=tableId) - for kwArg, _ in kwargs.items(): - for atId, kwId in atMapL: - if kwId == kwArg: - if tableDefObj.isAttributeStringType(atId): - cTup = ((tableId, atId), "EQ", (kwargs[kwId], "CHAR")) - else: - cTup = ((tableId, atId), "EQ", (kwargs[kwId], "OTHER")) - sqlConstraint.addValueCondition(cTup[0], cTup[1], cTup[2]) - break - # - # Add optional constraints OR ordering by primary key attributes - if sqlConstraint.get(): - sqlGen.setCondition(sqlConstraint) - else: - for atId in tableDefObj.getPrimaryKeyAttributeIdList(): - sqlGen.addOrderByAttributeId(attributeTuple=(tableId, atId)) - - atIdList = self.__sD.getAttributeIdList(tableId) - for atId in atIdList: - sqlGen.addSelectAttributeId(attributeTuple=(tableId, atId)) - # - sqlS = sqlGen.getSql() - if self.__debug: - logger.info("selection sql: %s", sqlS) - - rowList = myQ.selectRows(queryString=sqlS) - sqlGen.clear() - # - # return the result set as a list of dictionaries - # - for iRow, row in enumerate(rowList): - rD = {} - for colVal, atId in zip(row, atIdList): - rD[atId] = colVal - if self.__debug: - logger.info("result set row %d dictionary %r", iRow, rD.items()) - rdList.append(rD) - if iOpened: - self._close() - except Exception as e: - status = " operation error " + str(e) - logger.info("status %s", status) - if self.__verbose: - logger.exception("Failing with %s", str(e)) - - if self.__debug: - endTime = time.time() - logger.info("Completed at %s (%.3f seconds)", time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - startTime) - return rdList - - def _deleteRequest(self, tableId, **kwargs): - """Delete from input table records identified by the keyword value pairs provided as input arguments -""" - startTime = time.time() - if self.__debug: - logger.info("Starting at %s", time.strftime("%Y %m %d %H:%M:%S", time.localtime())) - ret = False - try: - iOpened = False - if self.__dbCon is None: - self._open() - iOpened = True - - tableDefObj = self.__sD.getSchemaObject(tableId) - # - # - myQ = MyDbQuery(dbcon=self.__dbCon, verbose=self.__verbose) - myAd = SqlGenAdmin(self.__verbose) - # - # Create the attribute and value list for template -- - # - vList = [] - aList = [] - for atId, kwId in self._getAttributeParameterMap(tableId): - if kwId in kwargs and kwargs[kwId] is not None: - vList.append(kwargs[kwId]) - aList.append(atId) - - sqlT = myAd.idDeleteTemplateSQL(self.__databaseName, tableDefObj, conditionAttributeIdList=aList) - if self.__debug: - logger.info("delete sql: %s", sqlT) - logger.info("delete values: %r", vList) - ret = myQ.sqlTemplateCommand(sqlTemplate=sqlT, valueList=vList) - - if iOpened: - self._close() - - except Exception as e: - status = " delete operation error " + str(e) - logger.info("status %s", status) - if self.__verbose: - logger.exception("Failing with %s", str(e)) - - if self.__debug: - endTime = time.time() - logger.info("Completedat %s (%.3f seconds)", time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - startTime) - return ret diff --git a/rcsb/db/mysql/MyDbUtil.py b/rcsb/db/mysql/MyDbUtil.py deleted file mode 100644 index e15df4c5..00000000 --- a/rcsb/db/mysql/MyDbUtil.py +++ /dev/null @@ -1,316 +0,0 @@ -## -# File: MyDdUtil.py -# Author: J. Westbrook -# Date: 27-Jan-2012 -# Version: 0.001 Initial version -# -# Updates: -# 27-Jan-2012 Jdw Refactored and consolidated MySQL utilities from various sources -# 31-Jan-2012 Jdw Move SQL generators to a separate class - -# 9-Jan-2013 jdw add parameters to connection method to permit batch file loading. -# 11-Jan-2013 jdw make mysql warnings generate exceptions. -# 21-Jan-2013 jdw adjust the dbapi command order for processing sql command lists - -# tested with batch loading using truncate/load & delete from /load -# 11-Jul-2013 jdw add optional parameter for database socket - -# 11-Nov-2014 jdw add authentication via dictionary object - -# 3-Mar-2016 jdw add port parameter option to connect method - -# 11-Aug-2016 jdw add connection pool wrapper -# 11-Aug-2016 jdw add chunked fetch method -# -# 10-Mar-2018 jdw Py2->Py3 compatibility using driver fork described at https://mysqlclient.readthedocs.io/user_guide.html# -# 25-Mar-2018 jdw Connection class moved Connection/ConnectionBase -# 30-Mar-2018 jdw adjust the exception handing -- and control of warnings -# -## -""" -Utility classes to create connections and process SQL commands with a MySQL RDBMS. - -""" - -from __future__ import generators - -import logging -import warnings - -# pylint: disable=no-member -import MySQLdb - -__docformat__ = "restructuredtext en" -__author__ = "John Westbrook" -__email__ = "jwest@rcsb.rutgers.edu" -__license__ = "Apache 2.0" - -logger = logging.getLogger(__name__) - -# -# Pooling seems to be broken for Py3 on MACos - -# if platform.system() == "Linux": -# try: -# import sqlalchemy.pool as pool -# MySQLdb = pool.manage(MySQLdb, pool_size=12, max_overflow=12, timeout=30, echo=True, use_threadlocal=True) -# except Exception as e: -# pass - - -class MyDbQuery(object): - - """Parameterized SQL queries using Python DBI protocol...""" - - def __init__(self, dbcon, verbose=True): - self.__dbcon = dbcon - self.__verbose = verbose - self.__ops = ["EQ", "GE", "GT", "LT", "LE", "LIKE", "NOT LIKE"] - self.__opDict = {"EQ": "=", "GE": ">=", "GT": ">", "LT": "<", "LE": "<=", "LIKE": "LIKE", "NOT LIKE": "NOT LIKE"} - self.__logOps = ["AND", "OR", "NOT"] - self.__grpOps = ["BEGIN", "END"] - self.__warningAction = "default" - - def sqlBatchTemplateCommand(self, templateValueList, prependSqlList=None): - """Execute a batch sql commands followed by a single commit. Commands are - are describe in a template with an associated list of values. - - prependSqlList = Optional list of SQL commands to be executed prior to any - batch template commands. - - Errors and warnings that generate exceptions are caught by this method. - """ - with warnings.catch_warnings(): - self.__setWarningHandler() - try: - tpl = "" - vL = [] - curs = self.__dbcon.cursor() - if (prependSqlList is not None) and prependSqlList: - sqlCommand = "\n".join(prependSqlList) - curs.execute(sqlCommand) - - for tpl, vL in templateValueList: - curs.execute(tpl, vL) - self.__dbcon.commit() - curs.close() - return True - except MySQLdb.Error as e: - logger.info("MySQL error message is:\n%s\n", str(e)) - logger.error("SQL command failed for:\n%s\n", (tpl % tuple(vL))) - self.__dbcon.rollback() - curs.close() - except MySQLdb.Warning as e: - logger.info("MySQL warning message is:\n%s\n", str(e)) - logger.info("SQL Command generated warnings for command:\n%s\n", (tpl % tuple(vL))) - self.__dbcon.rollback() - curs.close() - except Exception as e: - logger.info("SQL Command generated exception for command:\n%s\n", (tpl % tuple(vL))) - logger.exception("Failing with %s", str(e)) - self.__dbcon.rollback() - curs.close() - return False - - def sqlTemplateCommand(self, sqlTemplate=None, valueList=None): - """Execute sql template command with associated value list. - - Errors and warnings that generate exceptions are caught by this method. - """ - vList = valueList if valueList else [] - with warnings.catch_warnings(): - self.__setWarningHandler() - try: - curs = self.__dbcon.cursor() - curs.execute(sqlTemplate, vList) - self.__dbcon.commit() - curs.close() - return True - except MySQLdb.Error as e: - logger.info("SQL command failed for:\n%s\n", (sqlTemplate % tuple(vList))) - logger.error("MySQL error message is:\n%s\n", str(e)) - self.__dbcon.rollback() - curs.close() - except MySQLdb.Warning as e: - logger.info("MYSQL warnings for command:\n%s\n", (sqlTemplate % tuple(vList))) - logger.warning("MySQL warning message is:\n%s\n", str(e)) - self.__dbcon.rollback() - curs.close() - except Exception as e: - logger.info("SQL Command generated warnings command:\n%s\n", (sqlTemplate % tuple(vList))) - logger.exception("Failing with %s", str(e)) - self.__dbcon.rollback() - curs.close() - return False - - def setWarning(self, action): - if action in ["error", "ignore", "default"]: - self.__warningAction = action - return True - else: - self.__warningAction = "default" - return False - - def __setWarningHandler(self): - """'error' will map all MySQL warnings to exceptions - - - 'ignore' will completely suppress warnings - - other settings may print warning directly to stderr - """ - if self.__warningAction == "error": - warnings.simplefilter("error", category=MySQLdb.Warning) - elif self.__warningAction in ["ignore", "default"]: - warnings.simplefilter(self.__warningAction) - else: - warnings.simplefilter("default") - - def sqlCommand(self, sqlCommandList): - """Execute the input list of SQL commands catching exceptions from the server. - - The treatment of warning is controlled by a prior setting of self.setWarnings("error"|"ignore"|"default") - - category=MySQLdb.Warning - - """ - with warnings.catch_warnings(): - self.__setWarningHandler() - curs = None - try: - sqlCommand = "" - curs = self.__dbcon.cursor() - for sqlCommand in sqlCommandList: - curs.execute(sqlCommand) - # - self.__dbcon.commit() - curs.close() - return True - except MySQLdb.Error as e: - logger.info("SQL command failed for:\n%s\n", sqlCommand) - logger.error("MySQL error is message is:\n%s\n", str(e)) - # self.__dbcon.rollback() - if curs: - curs.close() - except MySQLdb.Warning as e: - logger.info("SQL generated warnings for command:\n%s\n", sqlCommand) - logger.warning("MySQL warning message is:\n%s\n", str(e)) - # self.__dbcon.rollback() - if curs: - curs.close() - return True - except Exception as e: - logger.info("SQL command failed for:\n%s\n", sqlCommand) - logger.exception("Failing with %s", str(e)) - # self.__dbcon.rollback() - if curs: - curs.close() - - return False - - def sqlCommand2(self, queryString): - """Execute SQL command catching exceptions returning no data from the server.""" - curs = None - with warnings.catch_warnings(): - self.__setWarningHandler() - try: - curs = self.__dbcon.cursor() - curs.execute(queryString) - curs.close() - return True - except MySQLdb.ProgrammingError as e: - logger.error("MySQL warning is message is:\n%s\n", str(e)) - if curs: - curs.close() - except MySQLdb.OperationalError as e: - logger.info("SQL command failed for:\n%s\n", queryString) - logger.info("MySQL warning is message is:\n%s\n", str(e)) - if curs: - curs.close() - except MySQLdb.Error as e: - logger.info("SQL command failed for:\n%s\n", queryString) - logger.info("MySQL warning is message is:\n%s\n", str(e)) - if curs: - curs.close() - except Exception as e: - logger.info("SQL command failed for:\n%s\n", queryString) - if curs: - curs.close() - logger.exception("Failing with %s", str(e)) - return [] - - def __fetchIter(self, cursor, rowSize=1000): - """Chunked iterator to manage results fetches to mysql server""" - while True: - results = cursor.fetchmany(rowSize) - if not results: - break - for result in results: - yield result - - def selectRows(self, queryString): - """Execute SQL command and return list of lists for the result set.""" - rowList = [] - with warnings.catch_warnings(): - warnings.simplefilter("error") - try: - curs = self.__dbcon.cursor() - curs.execute(queryString) - while True: - result = curs.fetchone() - if result is not None: - rowList.append(result) - else: - break - curs.close() - return rowList - except MySQLdb.ProgrammingError as e: - logger.warning("MySQL warning is message is:\n%s\n", str(e)) - curs.close() - except MySQLdb.OperationalError as e: - logger.info("SQL command failed for:\n%s\n", queryString) - logger.warning("MySQL warning is message is:\n%s\n", str(e)) - curs.close() - except MySQLdb.Error as e: - logger.info("SQL command failed for:\n%s\n", queryString) - logger.error("MySQL warning is message is:\n%s\n", str(e)) - curs.close() - except Exception as e: - logger.info("SQL command failed for:\n%s\n", queryString) - logger.exception("Failing with %s", str(e)) - curs.close() - - return [] - - def simpleQuery(self, selectList, fromList, condition="", orderList=None, returnObj=None): - """ """ - # - oL = orderList if orderList else [] - retObj = returnObj if returnObj else [] - # - colsCsv = ",".join(["%s" % k for k in selectList]) - tablesCsv = ",".join(["%s" % k for k in fromList]) - - order = "" - if oL: - (sV, tV) = oL[0] - order = " ORDER BY CAST(%s AS %s) " % (sV, tV) - for (sV, tV) in oL[1:]: - order += ", CAST(%s AS %s) " % (sV, tV) - - # - query = "SELECT " + colsCsv + " FROM " + tablesCsv + condition + order - logger.debug("Query: %s\n", query) - curs = self.__dbcon.cursor() - curs.execute(query) - while True: - result = curs.fetchone() - if result is not None: - retObj.append(result) - else: - break - curs.close() - return retObj - - def testSelectQuery(self, count): - tSQL = "select %d" % count - # - try: - rowL = self.selectRows(queryString=tSQL) - tup = rowL[0] - return int(str(tup[0])) == count - except Exception: - return False diff --git a/rcsb/db/mysql/MysqlSchemaImporter.py b/rcsb/db/mysql/MysqlSchemaImporter.py deleted file mode 100644 index d5491a86..00000000 --- a/rcsb/db/mysql/MysqlSchemaImporter.py +++ /dev/null @@ -1,151 +0,0 @@ -## -# File: MysqlSchemaImporter.py -# Date: 20-May-2015 jdw -# -# Create a skeleton schema map definition data structure from mysql -# table descriptions. -# -# ** This is a command-line utility that is not part of the overall api -# -import logging -import os -import pprint -import sys - -logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s") -logger = logging.getLogger() -logger.setLevel(logging.INFO) -logger = logging.getLogger(__name__) - - -class MysqlSchemaImporter(object): - def __init__(self, dbUser, dbPw, dbHost, mysqlPath="/opt/local/bin/mysql", verbose=True): - self.__verbose = verbose - self.__mysqlPath = mysqlPath - self.__dbUser = dbUser - self.__dbPw = dbPw - self.__dbHost = dbHost - - def __import(self, filePath): - colDataList = [] - ifh = open(filePath, "r", encoding="utf-8") - for line in ifh: - if line is not None and line: - fields = str(line[:-1]).split("\t") - if len(fields) != 6: - logger.info("bad line in %s = %s", filePath, line) - continue - else: - colDataList.append(fields) - ifh.close() - os.remove(filePath) - # - return colDataList[1:] - - def __export(self, filePath, db, tableName): - cmdDetail = ' --user=%s --password=%s --host=%s %s -e "describe %s;" ' % (self.__dbUser, self.__dbPw, self.__dbHost, db, tableName) - cmd = self.__mysqlPath + cmdDetail + " > %s" % filePath - return os.system(cmd) - - def __buildDef(self, dbName, tableName, colDataList): - _ = dbName - defD = {} - tableId = str(tableName).upper() - attIdKeyList = [] - attMap = {} - indD = {} - attInfo = {} - attD = {} - for ii, ff in enumerate(colDataList, start=1): - attName = str(ff[0]) - attId = str(attName).upper() - nullFlag = True if ff[2] == "YES" else False - impType = ff[1] - if "(" in impType: - width = impType[impType.find("(") + 1 : -1] - sqlType = impType[: impType.find("(")] - else: - width = 10 - sqlType = impType - precision = 0 - if ff[3] in ["MUL", "PRI"]: - attIdKeyList.append(attId) - keyFlag = True - else: - keyFlag = False - attD[attId] = attName - attMap[attId] = (tableName, attName, None, None) - attInfo[attId] = {"NULLABLE": nullFlag, "ORDER": ii, "PRECISION": precision, "PRIMARY_KEY": keyFlag, "APP_TYPE": sqlType.upper(), "WIDTH": width} - # - dD = {} - dD["ATTRIBUTES"] = tuple(attIdKeyList) - dD["TYPE"] = "UNIQUE" - indD["p1"] = dD - defD["INDICES"] = indD - defD["ATTRIBUTES"] = attD - defD["ATTRIBUTE_INFO"] = attInfo - defD["ATTRIBUTE_MAP"] = attMap - # - defD["SCHEMA_DELETE_ATTRIBUTE"] = attIdKeyList[0] - defD["SCHEMA_ID"] = tableId - defD["SCHEMA_NAME"] = tableName - defD["SCHEMA_TYPE"] = "transactional" - # 'MAP_MERGE_INDICES': {'valence_ref': {'ATTRIBUTES': ('id',), 'TYPE': 'EQUI-JOIN'}}, - tD = {} - tD["ATTRIBUTES"] = tuple(attIdKeyList) - tD["TYPE"] = "EQUI-JOIN" - defD["MAP_MERGE_INDICES"] = {} - defD["MAP_MERGE_INDICES"][tableName] = tD - # - return tableId, defD - - def create(self, dbName, tableNameList): - schemaDef = {} - for tableName in tableNameList: - fn = "mysql-schema-" + tableName + ".txt" - self.__export(fn, dbName, tableName) - colDataList = self.__import(fn) - if colDataList: - logger.info("tableName %s length %d\n", tableName, len(colDataList)) - tableId, defD = self.__buildDef(dbName, tableName, colDataList) - schemaDef[tableId] = defD - # - pprint.pprint(schemaDef, stream=sys.stdout, width=120, indent=3) - - -def importExample(): - # tableNameList = ["entity", "entity_poly", "entity_src_gen", "entity_src_nat", "pdbx_entity_src_syn"] - tableNameList = [ - "PDB_status_information", - "audit_author", - "chem_comp", - "citation", - "citation_author", - "deposition_from_09", - "deposition_from_2", - "entity", - "entity_poly", - "pdb_entry", - "pdbx_contact_author", - "pdbx_database_PDB_obs_spr", - "pdbx_database_status_history", - "pdbx_depui_entry_details", - "pdbx_molecule", - "pdbx_molecule_features", - "pdbx_prerelease_seq", - "processing_status", - "rcsb_status", - "rcsb_status_t", - "struct", - "pdbx_database_status_history", - ] - # tableNameList = ['weight_in_asu'] - dbUser = os.getenv("MYSQL_DB_USER") - dbPw = os.getenv("MYSQL_SBKB_PW") - dbHost = "localhost" - msi = MysqlSchemaImporter(dbUser, dbPw, dbHost, mysqlPath="/opt/local/bin/mysql", verbose=True) - msi.create("stat", tableNameList) - - -if __name__ == "__main__": - importExample() diff --git a/rcsb/db/mysql/SchemaDefLoader.py b/rcsb/db/mysql/SchemaDefLoader.py deleted file mode 100644 index 4c93df12..00000000 --- a/rcsb/db/mysql/SchemaDefLoader.py +++ /dev/null @@ -1,461 +0,0 @@ -## -# File: SchemaDefLoader.py -# Author: J. Westbrook -# Date: 7-Jan-2013 -# Version: 0.001 Initial version -# -# Updates: -# 9-Jan-2013 jdw add merging index support for loading tables from multiple -# instance categories. -# 10-Jan-2013 jdw add null value filter and maximum string width checks. -# 13-Jan-2013 jdw provide batch file and batch insert loading modes. -# 15-Jan-2013 jdw add pre-load delete options -# 19-Jan-2012 jdw add IoAdapter -# 20-Jan-2013 jdw add append options for batch file loading -# 20-Jan-2013 jdw provide methods for loading container lists -# 2-Oct-2017 jdw escape null string '\N' and suppress print statements -# 20-Dec-2017 jdw set to use python adapter - -# 30-Dec-2017 jdw add crate server support - 'crate-insert', 'crate-insert-many' -# 4-Jan-2018 jdw add table skipping filters -# 4-Feb-2018 jdw add cockroach server support - 'cockroach-insert', 'cockroach-insert-many' -# 13-Mar-2018 jdw split data loading and data processing operations. -# 30-Mar-2018 jdw more refactoring - changing expectations on explicit data types - -# 2-Jul-2018 jdw fix working path -# 20-Aug-2019 jdw add dynamic method invocation - -# 11-Nov-2018 jdw add DrugBank and CCDC mapping path details. -# 31-Mar-2019 jdw add more speific tests for null value suggested by -# issue = MySQL SchemaDefLoader skip zero values #19 -## -""" -Generic mapper of PDBx/mmCIF instance data to SQL loadable data files based on external -schema definition defined in class SchemaDefBase(). - -""" -__docformat__ = "restructuredtext en" -__author__ = "John Westbrook" -__email__ = "jwest@rcsb.rutgers.edu" -__license__ = "Apache 2.0" - - -import csv -import logging -import os -import time - -from mmcif.api.DictMethodRunner import DictMethodRunner -from rcsb.utils.dictionary.DictionaryApiProviderWrapper import DictionaryApiProviderWrapper -from rcsb.utils.dictionary.DictMethodResourceProvider import DictMethodResourceProvider -from rcsb.db.mysql.MyDbUtil import MyDbQuery -from rcsb.db.processors.DataTransformFactory import DataTransformFactory -from rcsb.db.processors.SchemaDefDataPrep import SchemaDefDataPrep -from rcsb.db.sql.SqlGen import SqlGenAdmin -from rcsb.utils.repository.RepositoryProvider import RepositoryProvider - -logger = logging.getLogger(__name__) -# - - -class SchemaDefLoader(object): - - """Map PDBx/mmCIF instance data to SQL loadable data using external schema definition.""" - - def __init__( - self, - cfgOb, - schemaDefObj, - cfgSectionName="site_info_configuration", - dbCon=None, - cachePath=".", - workPath=".", - cleanUp=False, - warnings="default", - verbose=True, - restoreUseStash=True, - restoreUseGit=True, - providerTypeExcludeL=None, - ): - self.__verbose = verbose - self.__debug = False - self.__cfgOb = cfgOb - sectionName = cfgSectionName - self.__sD = schemaDefObj - - # - self.__dbCon = dbCon - self.__cachePath = cachePath - self.__workPath = workPath - self.__pathList = [] - self.__cleanUp = cleanUp - # - self.__colSep = "&##&\t" - self.__rowSep = "$##$\n" - # - # - self.__fTypeRow = "skip-max-width" - self.__fTypeCol = "skip-max-width" - # - self.__warningAction = warnings - dtf = DataTransformFactory(schemaDefAccessObj=self.__sD, filterType=self.__fTypeRow) - self.__sdp = SchemaDefDataPrep(schemaDefAccessObj=self.__sD, dtObj=dtf, workPath=self.__cachePath, verbose=self.__verbose) - self.__rpP = RepositoryProvider(cfgOb=self.__cfgOb, cachePath=self.__cachePath) - # - schemaName = self.__sD.getName() - modulePathMap = self.__cfgOb.get("DICT_METHOD_HELPER_MODULE_PATH_MAP", sectionName=sectionName) - dP = DictionaryApiProviderWrapper(self.__cachePath, cfgOb=self.__cfgOb, configName=sectionName, useCache=True) - dictApi = dP.getApiByName(schemaName) - rP = DictMethodResourceProvider( - self.__cfgOb, cachePath=self.__cachePath, - restoreUseStash=restoreUseStash, - restoreUseGit=restoreUseGit, - providerTypeExcludeL=providerTypeExcludeL, - ) - self.__dmh = DictMethodRunner(dictApi, modulePathMap=modulePathMap, resourceProvider=rP) - - def setWarning(self, action): - if action in ["error", "ignore", "default"]: - self.__warningAction = action - return True - else: - self.__warningAction = "default" - return False - - def setDelimiters(self, colSep=None, rowSep=None): - """Set column and row delimiters for intermediate data files used for - batch-file loading operations. - """ - self.__colSep = colSep if colSep is not None else "&##&\t" - self.__rowSep = rowSep if rowSep is not None else "$##$\n" - return True - - def load(self, inputPathList=None, containerList=None, loadType="batch-file", deleteOpt=None, tableIdSkipD=None): - """Load data for each table defined in the current schema definition object. - Data are extracted from the input file list. - - Data source options: - - inputPathList = [, ....] - - or - - containerList = [ data container, ...] - - - loadType = ['batch-file' | 'batch-insert'] - deleteOpt = 'selected' | 'all' - - tableIdSkipD - searchable container with tableIds to be skipped on loading - - - Loading is performed using the current database server connection. - - Intermediate data files for 'batch-file' loading are created in the current working path. - - Returns True for success or False otherwise. - - """ - tableIdSkipD = tableIdSkipD if tableIdSkipD is not None else {} - tableDataDict = {} - containerNameList = [] - if inputPathList is not None: - cL = self.__rpP.getContainerList(inputPathList) - # - # Apply dynamic methods here - - # - for cA in cL: - self.__dmh.apply(cA) - tableDataDict, containerNameList = self.__sdp.process(cL) - - elif containerList is not None: - tableDataDict, containerNameList = self.__sdp.process(containerList) - # - if loadType in ["batch-file", "batch-file-append"]: - append = True if loadType == "batch-file-append" else False - exportList = self.__exportTdd(tableDataDict, colSep=self.__colSep, rowSep=self.__rowSep, append=append) - for tableId, loadPath in exportList: - if tableId in tableIdSkipD: - continue - self.__batchFileImport(tableId, loadPath, sqlFilePath=None, containerNameList=containerNameList, deleteOpt=deleteOpt) - if self.__cleanUp: - self.__cleanUpFile(loadPath) - return True - elif loadType == "batch-insert": - for tableId, rowList in tableDataDict.items(): - if tableId in tableIdSkipD: - continue - if deleteOpt in ["all", "selected"] or rowList: - self.__batchInsertImport(tableId, rowList=rowList, containerNameList=containerNameList, deleteOpt=deleteOpt) - return True - else: - pass - - return False - - def __cleanUpFile(self, filePath): - try: - os.remove(filePath) - except Exception: - pass - - def makeLoadFilesMulti(self, dataList, procName, optionsD, workingDir): - """Create a loadable data file for each table defined in the current schema - definition object. Data is extracted from the input file list. - - Load files are creating in the current working path. - - Return the containerNames for the input path list, and path list for load files that are created. - - """ - _ = workingDir - try: - pn = procName.split("-")[-1] - except Exception: - pn = procName - - exportFormat = optionsD["exportFormat"] if "exportFormat" in optionsD else "tdd" - r1, r2 = self.makeLoadFiles(inputPathList=dataList, partName=pn, exportFormat=exportFormat) - return dataList, r1, r2, [] - - def makeLoadFiles(self, inputPathList, append=False, partName="1", exportFormat="tdd"): - """Create a loadable data file for each table defined in the current schema - definition object. Data is extracted from the input file list. - - Load files are created in the current working path. - - Return the containerNames for the input path list, and path list for load files that are created. - - """ - cL = self.__rpP.getContainerList(inputPathList) - for cA in cL: - self.__dmh.apply(cA) - tableDataDict, containerNameList = self.__sdp.process(cL) - if exportFormat == "tdd": - return containerNameList, self.__exportTdd(tableDataDict, colSep=self.__colSep, rowSep=self.__rowSep, append=append, partName=partName) - elif exportFormat == "csv": - return containerNameList, self.__exportCsv(tableDataDict, append=append, partName=partName) - else: - return [], [] - - def __exportCsv(self, tableDict, append=False, partName="1"): - """ """ - modeOpt = "a" if append else "w" - - exportList = [] - for tableId, rowList in tableDict.items(): - if not rowList: - continue - tObj = self.__sD.getSchemaObject(tableId) - schemaAttributeIdList = tObj.getAttributeIdList() - attributeNameList = tObj.getAttributeNameList() - # - fn = os.path.join(self.__workPath, tableId + "-" + partName + ".csv") - with open(fn, modeOpt, newline="", encoding="utf-8") as ofh: - csvWriter = csv.writer(ofh) - csvWriter.writerow(attributeNameList) - for rD in rowList: - csvWriter.writerow([rD[aId] for aId in schemaAttributeIdList]) - - exportList.append((tableId, fn)) - return exportList - - def __exportTdd(self, tableDict, colSep="&##&\t", rowSep="$##$\n", append=False, partName="1"): - modeOpt = "a" if append else "w" - - exportList = [] - for tableId, rowList in tableDict.items(): - tObj = self.__sD.getSchemaObject(tableId) - schemaAttributeIdList = tObj.getAttributeIdList() - # - if rowList: - fn = os.path.join(self.__workPath, tableId + "-" + partName + ".tdd") - ofh = open(fn, modeOpt, encoding="utf-8") - for rD in rowList: - # logger.info("%r" % colSep.join([str(rD[aId]) for aId in schemaAttributeIdList])) - ofh.write("%s%s" % (colSep.join([str(rD[aId]) for aId in schemaAttributeIdList]), rowSep)) - ofh.close() - exportList.append((tableId, fn)) - return exportList - - def loadBatchFiles(self, loadList=None, containerNameList=None, deleteOpt=None): - """Load data for each table defined in the current schema definition object using - - Data source options: - - loadList = [(tableId, 0 and row[id] != r'\N': - if row[tid] is not None and row[tid] != r"\N": - vList.append(row[tid]) - aList.append(nm) - sqlInsertList.append((sqlGen.insertTemplateSQL(databaseName, tableName, aList), vList)) - - ret = myQ.sqlBatchTemplateCommand(sqlInsertList, prependSqlList=sqlDeleteList) - if ret: - logger.debug("Batch insert completed for table %s rows %d\n", tableName, len(sqlInsertList)) - else: - logger.error("Batch insert fails for table %s length %d\n", tableName, len(sqlInsertList)) - - endTime = time.time() - if self.__verbose: - logger.debug("Completed at %s (%.3f seconds)\n", time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - startTime) - - return ret - - def __deleteFromTable(self, tableIdList, deleteValue): - """Delete data from the input table list where the schema table delete attribute - has the input value "deleteValue". - - """ - databaseName = self.__sD.getDatabaseName() - sqlList = [] - sqlGen = SqlGenAdmin(self.__verbose) - for tableId in tableIdList: - tableName = self.__sD.getSchemaName(tableId) - tableDefObj = self.__sD.getSchemaObject(tableId) - atName = tableDefObj.getDeleteAttributeName() - sqlTemp = sqlGen.deleteTemplateSQL(databaseName, tableName, [atName]) - sqlList.append(sqlTemp % deleteValue) - # - return sqlList diff --git a/rcsb/db/mysql/__init__.py b/rcsb/db/mysql/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/rcsb/db/sql/QueryDirectives.py b/rcsb/db/sql/QueryDirectives.py deleted file mode 100644 index 6e7c9f59..00000000 --- a/rcsb/db/sql/QueryDirectives.py +++ /dev/null @@ -1,513 +0,0 @@ -## -# File: QueryDirectives.py -# Author: J. Westbrook -# Date: 19-Jun-2015 -# Version: 0.001 Initial version -# -# Updates: -# 21-Jun-2015 jdw extend order directives -# 22-Jun-2015 jdw add VALUE_LIST_CONDITION for selecting alternatives values - -# 04-Jul-2015 jdw add accessor for current attribute selection - -# 09-Aug-2015 jdw add __queryDirSub(self, inpQueryDirList, domD={}) -# 09-Aug-2015 jdw add support for multi-valued references - DOM_REF_# -## -""" -A collection of classes to generate SQL commands to perform queries and schema construction. - -""" -__docformat__ = "restructuredtext en" -__author__ = "John Westbrook" -__email__ = "jwest@rcsb.rutgers.edu" -__license__ = "Apache 2.0" - - -import logging - -from rcsb.db.sql.SqlGen import SqlGenCondition, SqlGenQuery - -logger = logging.getLogger(__name__) - - -class QueryDirectives(object): - - """Process query directives and generate SQL instructions. - - mini- SQL Query API token stream. - - Ordered selection list: - - SELECT_ITEM::ITEM: - SELECT_ITEM::ITEM:DOM_REF: - - tableId.columnId as defined in the supporting schema definition. - - Example: - - SELECT_ITEM:1:ITEM:DOM_REF:xtype - SELECT_ITEM:2:ITEM:DOM_REF:ytype - - Query conditions: - - Conditions for single values (ordered): - - VALUE_CONDITION::LOP::ITEM::COP::VALUE: - VALUE_CONDITION::LOP::ITEM::COP::VALUE:DOM_REF: - - < comparison operator > in ['EQ', 'NE', 'GE', 'GT', 'LT', 'LE', 'LIKE', 'NOT LIKE'] - - Examples: - - VALUE_CONDITION:1:LOP:AND:ITEM:pdbx_webselect.crystal_twin:COP:GT:VALUE:DOM_REF:twin - VALUE_CONDITION:2:LOP:AND:ITEM:pdbx_webselect.entry_type:COP:EQ:VALUE:DOM_REF:molecular_type - VALUE_CONDITION:3:LOP:AND:ITEM:pdbx_webselect.space_group_name_H_M:COP:EQ:VALUE:DOM_REF:spaceg - VALUE_CONDITION:4:LOP:AND:ITEM:pdbx_webselect.refinement_software:COP:LIKE:VALUE:DOM_REF:software - - VALUE_CONDITION:5:LOP:AND:ITEM:pdbx_webselect.date_of_RCSB_release:COP:GE:VALUE:DOM_REF:date1 - VALUE_CONDITION:6:LOP:AND:ITEM:pdbx_webselect.date_of_RCSB_release:COP:LE:VALUE:DOM_REF:date2 - - Conditions for multiple values (ordered): - - VALUE_LIST_CONDITION::LOP::ITEM::COP:: - VALUE_LOP::VALUE_LIST: - VALUE_LIST_CONDITION::LOP::ITEM::COP:: - VALUE_LOP::VALUE_LIST:DOM_REF: - - < comparison operator > in ['EQ', 'NE', 'GE', 'GT', 'LT', 'LE', 'LIKE', 'NOT LIKE'] - - Examples: - - VALUE_LIST_CONDITION:1:LOP:AND:ITEM:pdbx_webselect.entry_type:COP:EQ:VALUE_LOP:OR:VALUE_LIST:DOM_REF:molecular_type - - Value condition(s) with indirect reference - - VALUE_KEYED_CONDITION::LOP::CONDITION_LIST_ID::VALUE: - VALUE_KEYED_CONDITION::LOP::CONDITION_LIST_ID::VALUE:DOM_REF: - - Example: - - VALUE_KEYED_CONDITION:15:LOP:AND:CONDITION_LIST_ID:1:VALUE:DOM_REF:solution - - Value condition list: - - Key values from VALUE_KEY_CONDIION declared as a set of VALUE_CONDITIONS. This provides the - means to associate a more complex query condition with a single input key value. - - Example: - - CONDITION_LIST:1:KEY:mr:LOP:OR:ITEM:pdbx_webselect.method_to_determine_struct:COP:LIKE:VALUE:%MR% - CONDITION_LIST:1:KEY:mr:LOP:OR:ITEM:pdbx_webselect.method_to_determine_struct:COP:LIKE:VALUE:%MOLECULAR REPLACEMENT% - - CONDITION_LIST:1:KEY:sad:LOP:OR:ITEM:pdbx_webselect.method_to_determine_struct:COP:LIKE:VALUE:%SAD% - CONDITION_LIST:1:KEY:sad:LOP:OR:ITEM:pdbx_webselect.method_to_determine_struct:COP:LIKE:VALUE:%MAD% - - CONDITION_LIST:1:KEY:other:LOP:AND:ITEM:pdbx_webselect.method_to_determine_struct:COP:NOT LIKE:VALUE:%MR% - CONDITION_LIST:1:KEY:other:LOP:AND:ITEM:pdbx_webselect.method_to_determine_struct:COP:NOT LIKE:VALUE:%MOLECULAR REPLACEMENT% - CONDITION_LIST:1:KEY:other:LOP:AND:ITEM:pdbx_webselect.method_to_determine_struct:COP:NOT LIKE:VALUE:%SAD% - CONDITION_LIST:1:KEY:other:LOP:AND:ITEM:pdbx_webselect.method_to_determine_struct:COP:NOT LIKE:VALUE:%MAD% - - Join condition (ordered): - - JOIN_CONDITION::LOP::L_ITEM::COP::R_ITEM: - JOIN_CONDITION::LOP::L_ITEM:DOM_REF::COP::R_ITEM:DOM_REF: - - Example: - - JOIN_CONDITION:1:LOP:AND:L_ITEM:pdbx_database_related.structure_id:COP:EQ:R_ITEM:entry.id - - - Sort order list: - - ORDER_ITEM:1:ITEM::SORT_ORDER: - ORDER_ITEM:1:ITEM:DOM_REF::SORT_ORDER: - - Example: - - ORDER_ITEM:1:ITEM:DOM_REF:xtype - ORDER_ITEM:2:ITEM:DOM_REF:ytype - - - """ - - def __init__(self, schemaDefObj, verbose=False): - """Input: - - schemaDefObj = is instance of class derived from SchemaDefBase(). - - """ - self.__sD = schemaDefObj - self.__verbose = verbose - self.__debug = True - # - self.__selectTupList = [] - self.__orgSelectCount = 0 - - def build(self, queryDirL=None, domD=None, appendValueConditonsToSelect=False, queryDirSeparator=":", domRefSeparator="|"): - """Build SQL instructure from the input list of query directives and dictionary or dom references.""" - queryDirL = queryDirL if queryDirL else [] - domD = domD if domD else {} - if self.__verbose: - logger.debug("dom dictionary length domD %d\n", len(domD)) - tL = [] - qL = [] - self.__selectTupList = [] - # - # - for qD in queryDirL: - tL.extend(qD.split(queryDirSeparator)) - - # if self.__verbose: - # ("\n+%s.%s() tL %r\n" % (self.__class__.__name__, sys._getframe().f_code.co_name, tL)) - # - qL = self.__queryDirSub(inpQueryDirList=tL, domD=domD, domRefSeparator=domRefSeparator) - - if self.__debug: - logger.debug("length qL %d\n", len(qL)) - for qV in qL: - logger.debug("qL %r\n", qV) - # Now parse the token list -- - # - selectD, orderD, conditionD, self.__orgSelectCount = self.__parseTokenList(qL, appendValueConditonsToSelect) - sqlS = self.__sqlGen(selectD, orderD, conditionD) - return sqlS - - def getAttributeSelectList(self): - """Return the current list of [(tableId,attributeId),...] in query order -""" - return self.__selectTupList, self.__orgSelectCount - - def __getTokenD(self, tL, index, nPairs): - """Return a dictionary of token and value pairs in the input list starting at tL[index].""" - tD = {} - try: - i1 = index - i2 = index + nPairs * 2 - for i in range(i1, i2, 2): - tD[tL[i]] = tL[i + 1] - except Exception as e: - if self.__verbose: - logger.error("fails with index %d nPairs %d tL %r", index, nPairs, tL) - logger.exception("Failing with %s", str(e)) - - return tD - - # - # - - def __parseTokenList(self, qdL, appendValueConditonsToSelect=False): - """ - Parse input list of tokens and return dictionaries of instructions (selections, conditions, sorting order) - for input to the SQL query generator. - """ - try: - selectD = {} - conditionD = {} - keyCondD = {} - condListD = {} - orderD = {} - # - tD = {} - # - i = 0 - while i < len(qdL): - # Get selections - - # - if qdL[i] in ["SELECT_ITEM"]: - ordinal = int(str(qdL[i + 1])) - tD = self.__getTokenD(qdL, i + 2, 1) - if ("ITEM" in tD) and (tD["ITEM"] is not None): - tdotc = str(tD["ITEM"]).split(".") - # (tableId, attributeId) apply the upper case convention used in schema map - selectD[ordinal] = (tdotc[0].upper(), tdotc[1].upper()) - else: - if self.__verbose: - logger.debug("selection incomplete at i = %d\n", i) - for k, v in tD.items(): - logger.debug(" --- tD -- %r %r\n", k, v) - # raise ValueError("Selection definition incomplete") - i += 4 - continue - elif qdL[i] in ["VALUE_CONDITION"]: - ordinal = int(str(qdL[i + 1])) - tD = self.__getTokenD(qdL, i + 2, 4) - if ("VALUE" in tD) and (tD["VALUE"] is not None): - if "LOP" in tD and "ITEM" in tD and "COP" in tD: - tdotc = str(tD["ITEM"]).split(".") - tableId = tdotc[0].upper() - attributeId = tdotc[1].upper() - tObj = self.__sD.getSchemaObject(tableId) - aType = tObj.getAttributeType(attributeId) - cop = str(tD["COP"]).upper() - conditionD[ordinal] = {"cType": "value", "lOp": tD["LOP"], "cObj": ((tableId, attributeId), cop, (tD["VALUE"], aType))} - else: - raise ValueError("Value condition incomplete") - else: - pass - i += 10 - continue - elif qdL[i] in ["VALUE_LIST_CONDITION"]: - ordinal = int(str(qdL[i + 1])) - tD = self.__getTokenD(qdL, i + 2, 5) - if ("VALUE_LIST" in tD) and (tD["VALUE_LIST"] is not None): - if "LOP" in tD and "ITEM" in tD and "COP" in tD and "VALUE_LOP" in tD: - tdotc = str(tD["ITEM"]).split(".") - tableId = tdotc[0].upper() - attributeId = tdotc[1].upper() - tObj = self.__sD.getSchemaObject(tableId) - aType = tObj.getAttributeType(attributeId) - cop = str(tD["COP"]).upper() - vLop = str(tD["VALUE_LOP"]).upper() - if isinstance(tD["VALUE_LIST"], list): - vL = tD["VALUE_LIST"] - else: - vL = [tD["VALUE_LIST"]] - conditionD[ordinal] = {"cType": "value_list", "lOp": tD["LOP"], "cObj": ((tableId, attributeId), cop, vLop, (vL, aType))} - else: - raise ValueError("Value list condition incomplete") - else: - pass - i += 12 - continue - elif qdL[i] in ["JOIN_CONDITION"]: - ordinal = int(str(qdL[i + 1])) - tD = self.__getTokenD(qdL, i + 2, 4) - if "LOP" in tD and "L_ITEM" in tD and "COP" in tD and "R_ITEM" in tD: - ltdotc = str(tD["L_ITEM"]).split(".") - ltableId = ltdotc[0].upper() - lattributeId = ltdotc[1].upper() - rtdotc = str(tD["R_ITEM"]).split(".") - rtableId = rtdotc[0].upper() - rattributeId = rtdotc[1].upper() - cop = str(tD["COP"]).upper() - conditionD[ordinal] = {"cType": "join", "lOp": tD["LOP"], "cObj": ((ltableId, lattributeId), cop, (rtableId, rattributeId))} - else: - raise ValueError("Join condition incomplete") - i += 10 - continue - elif qdL[i] in ["CONDITION_LIST"]: - # example: CONDITION_LIST:1:KEY:mr:LOP:OR:ITEM:pdbx_webselect.solution:COP:LIKE:VALUE:%MR% - ordinal = int(str(qdL[i + 1])) - tD = self.__getTokenD(qdL, i + 2, 5) - if ("VALUE" in tD) and (tD["VALUE"] is not None): - if "LOP" in tD and "ITEM" in tD and "COP" in tD and "KEY" in tD: - tdotc = str(tD["ITEM"]).split(".") - tableId = tdotc[0].upper() - attributeId = tdotc[1].upper() - tObj = self.__sD.getSchemaObject(tableId) - aType = tObj.getAttributeType(attributeId) - cop = str(tD["COP"]).upper() - ky = str(tD["KEY"]) - # ('PDB_ENTRY_TMP', 'PDB_ID'), 'LIKE', ('x-ray', 'char'), 'AND') - if ordinal not in condListD: - condListD[ordinal] = {} - if ky not in condListD[ordinal]: - condListD[ordinal][ky] = [] - condListD[ordinal][ky].append((tD["LOP"], (tableId, attributeId), cop, (tD["VALUE"], aType))) - else: - raise ValueError("Value condition incomplete") - else: - pass - - i += 12 - continue - elif qdL[i] in ["VALUE_KEYED_CONDITION"]: - # example: "VALUE_KEYED_CONDITION:15:LOP:AND:CONDITION_LIST_ID:1:VALUE:DOM_REF:solution" - ordinal = int(str(qdL[i + 1])) - tD = self.__getTokenD(qdL, i + 2, 3) - if ("VALUE" in tD) and (tD["VALUE"] is not None): - if "LOP" in tD and "CONDITION_LIST_ID" in tD: - keyCondD[ordinal] = (int(str(tD["CONDITION_LIST_ID"])), tD["VALUE"], tD["LOP"]) - else: - raise ValueError("Value key condition incomplete") - else: - pass - i += 8 - continue - elif qdL[i] in ["ORDER_ITEM"]: - ordinal = int(str(qdL[i + 1])) - tD = self.__getTokenD(qdL, i + 2, 2) - if ("ITEM" in tD) and ("SORT_ORDER" in tD) and (tD["ITEM"] is not None): - tdotc = str(tD["ITEM"]).split(".") - # (tableId, attributeId) apply the upper case convention used in schema map - if tD["SORT_ORDER"] in ["ASC", "ASCENDING", "INCREASING"]: - sf = "ASC" - elif tD["SORT_ORDER"] in ["DESC", "DESCENDING", "DECREASING"]: - sf = "DESC" - else: - sf = "ASC" - - orderD[ordinal] = ((tdotc[0].upper(), tdotc[1].upper()), sf) - else: - if self.__verbose: - logger.debug("orderby incomplete at i = %d", i) - for k, v in tD.items(): - logger.debug(" --- tD -- %r %r", k, v) - # raise ValueError("Order definition incomplete") - i += 6 - continue - else: - pass - except Exception as e: - if self.__verbose: - logger.error("fails at i = %d", i) - for k, v in tD.items(): - logger.error(" --- tD -- %r %r\n", k, v) - logger.exception("Failing with %s", str(e)) - - # - # Create condition groups by expanding key-value condition definition using the supporting condition list info in condListD ... - # - for ordinal, keyCond in keyCondD.items(): - condListId, keyValue, lOp = keyCond - conditionD[ordinal] = {"cType": "group", "lOp": lOp, "cObj": []} - if condListId in condListD: - logger.debug("++++condListId %r keyValue %r lOp %r\n", condListId, keyValue, lOp) - if keyValue in condListD[condListId]: - for cond in condListD[condListId][keyValue]: - logger.debug("+++++++condListId %r keyValue %r lOp %r cond %r", condListId, keyValue, lOp, cond) - # example : ('OR', ('PDBX_WEBSELECT', 'METHOD_TO_DETERMINE_STRUCT'), 'LIKE', ('MOLECULAR REPLACEMENT', 'char') - # using condListD[ordinal][ky].append((tD['LOP'], (tableId, attributeId), cop, (tD['VALUE'], aType))) - conditionD[ordinal]["cObj"].append(cond) - # - if self.__verbose: - for k, v in selectD.items(): - logger.debug("select %r %r", k, v) - for k, v in orderD.items(): - logger.debug("order %r %r", k, v) - # - for k, v in keyCondD.items(): - logger.debug("keycondD %r %r", k, v) - for k1, vD in condListD.items(): - for k2, v in vD.items(): - logger.debug("condListD %r %r %r", k1, k2, v) - # - for k1, vD in conditionD.items(): - for k2, v in vD.items(): - logger.debug("ordinal %3d type %r: %r", k1, k2, v) - - # - orgSelectCount = len(selectD) - if appendValueConditonsToSelect: - vSelectL = [] - for k in sorted(conditionD.keys()): - tD = conditionD[k] - if tD["cType"] in ["value", "value_list"]: - vSelectL.append(tD["cObj"][0]) - elif tD["cType"] in ["group"]: - cL = tD["cObj"] - for cV in cL: - vSelectL.append(cV[1]) - nxtOrd = max(selectD.keys()) + 1 - for vSelect in vSelectL: - selectD[nxtOrd] = vSelect - nxtOrd += 1 - - # - return selectD, orderD, conditionD, orgSelectCount - - def __sqlGen(self, selectD, orderD, conditionD): - # - sqlGen = SqlGenQuery(schemaDefObj=self.__sD, verbose=self.__verbose) - - sTableIdList = [] - # for sTup in sList: - for k in sorted(selectD.keys()): - sTup = selectD[k] - sqlGen.addSelectAttributeId(attributeTuple=(sTup[0], sTup[1])) - sTableIdList.append(sTup[0]) - self.__selectTupList.append(sTup) - - sqlCondition = SqlGenCondition(schemaDefObj=self.__sD, verbose=self.__verbose) - if conditionD: - for k in sorted(conditionD.keys()): - cD = conditionD[k] - cObj = cD["cObj"] - lOp = cD["lOp"] - if cD["cType"] in ["value"]: - sqlCondition.addValueCondition(lhsTuple=cObj[0], opCode=cObj[1], rhsTuple=cObj[2], preOp=lOp) - elif cD["cType"] in ["join"]: - sqlCondition.addJoinCondition(lhsTuple=cObj[0], opCode=cObj[1], rhsTuple=cObj[2], preOp=lOp) - elif cD["cType"] in ["group"]: - sqlCondition.addGroupValueConditionList(cD["cObj"], preOp=lOp) - elif cD["cType"] in ["value_list"]: - # build cDefList = [(lPreOp,lhsTuple, opCode, rhsTuple), ...] from value_list - - # cObj = ((tableId, attributeId), cop, vLop, (tD['VALUE_LIST'], aType))} - # - vL = cObj[3][0] - # - vType = cObj[3][1] - vOp = cObj[2] - lhsTuple = cObj[0] - cOp = cObj[1] - cDefList = [] - for v in vL: - cDefList.append((vOp, lhsTuple, cOp, (v, vType))) - sqlCondition.addGroupValueConditionList(cDefList, preOp=lOp) - else: - pass - - sqlCondition.addTables(sTableIdList) - sqlGen.setCondition(sqlCondition) - - for k in sorted(orderD.keys()): - oTup, sf = orderD[k] - sqlGen.addOrderByAttributeId(attributeTuple=oTup, sortFlag=sf) - # - sqlS = sqlGen.getSql() - if self.__verbose: - logger.debug("sql:\n%s", sqlS) - sqlGen.clear() - # - return sqlS - - def __queryDirSub(self, inpQueryDirList, domD=None, domRefSeparator="|"): - """Substitute DOM references into the input query directive list - - - Substitions: - DOM_REF -> domD[DOM_REF value] - DOM_REF_# -> str(domD[DOM_REF value]).split(domRefSeparator)[#] (# = 0,1,2,...) - - Note -- DOM_REF_1 DOM_REF_2 allows a single dom element name to carry - multiple correlated values as in a "select" (e.g. dom-ref -> myselect = "value1|value2") - - """ - domD = domD if domD else {} - qL = [] - try: - i = 0 - while i < len(inpQueryDirList): - tV = inpQueryDirList[i] - if tV.upper().startswith("DOM_REF_"): - indx = int(str(tV.upper()).split("_")[2]) - if inpQueryDirList[i + 1] in domD and domD[inpQueryDirList[i + 1]] is not None and domD[inpQueryDirList[i + 1]]: - if isinstance(domD[inpQueryDirList[i + 1]], list) and (len(domD[inpQueryDirList[i + 1]]) > 1): - tV = [str(tt).split(domRefSeparator)[indx] for tt in domD[inpQueryDirList[i + 1]]] - elif isinstance(domD[inpQueryDirList[i + 1]], list) and (len(domD[inpQueryDirList[i + 1]]) == 1): - tV = str(domD[inpQueryDirList[i + 1]][0]).split(domRefSeparator)[indx] - else: - tV = str(domD[inpQueryDirList[i + 1]]).split(domRefSeparator)[indx] - else: - tV = "" - qL.append(tV if tV else None) - i += 1 - elif tV.upper() in ["DOM_REF"]: - if (inpQueryDirList[i + 1] in domD) and (domD[inpQueryDirList[i + 1]] is not None) and domD[inpQueryDirList[i + 1]]: - if isinstance(domD[inpQueryDirList[i + 1]], list) and (len(domD[inpQueryDirList[i + 1]]) > 1): - tV = domD[inpQueryDirList[i + 1]] - elif isinstance(domD[inpQueryDirList[i + 1]], list) and (len(domD[inpQueryDirList[i + 1]]) == 1): - tV = domD[inpQueryDirList[i + 1]][0] - else: - tV = domD[inpQueryDirList[i + 1]] - else: - tV = "" - qL.append(tV if tV else None) - i += 1 - else: - qL.append(tV) - i += 1 - except Exception as e: - if self.__verbose: - logger.error("fails at i = %d", i) - for ii, qd in enumerate(inpQueryDirList): - logger.error(" --- qd %4d %r\n", ii, qd) - logger.exception("Failing with %s", str(e)) - - return qL diff --git a/rcsb/db/sql/SqlGen.py b/rcsb/db/sql/SqlGen.py deleted file mode 100644 index 9962bb67..00000000 --- a/rcsb/db/sql/SqlGen.py +++ /dev/null @@ -1,1018 +0,0 @@ -## -# File: MyDdSqlGen.py -# Author: J. Westbrook -# Date: 31-Jan-2012 -# Version: 0.001 Initial version -# -# Updates: -# 27-Jan-2012 Jdw Refactored from MyDbUtil to isolate portable SQL generators. -# 1-Feb-2012 Jdw Add export/import methods -# 11-Apr-2014 jdw add template methods with attribute Id inputs. -# 25-May-2015 jdw complete the coding of the contraint generator class -# 28-May-2015 jdw adjust terminology in api method names and internal vars - -# 16-Jun-2015 jdw generalized the addition of a condition group addGroupValueConditionList -# 2-Oct-2017 jdw py3 compatibility use zip_longest -# 2-Oct-2017 jdw fix obvious error with missing len() in addKeyAttributeEquiJoinConditions() -# 30-Dec-2017 jdw add crate specific SQL generators -# 5-Jan-2018 jdw add default replication factor for crate - -# 20-Jun-2018 jdw adjustments for dynamic schema generation -# 7-Jul-2018 jdw update for new schema def prototypes -# 6-Feb-2023 dwp fix pylint issues -# -# -## -""" -A collection of classes to generate SQL commands to perform queries and schema construction. - -""" -__docformat__ = "restructuredtext en" -__author__ = "John Westbrook" -__email__ = "jwest@rcsb.rutgers.edu" -__license__ = "Apache 2.0" - -# pylint: disable=too-many-lines - -import copy -import itertools -import logging - -# from operator import itemgetter, attrgetter -try: - from itertools import zip_longest # pylint: disable=ungrouped-imports -except Exception: - from itertools import izip_longest as zip_longest # pylint: disable=ungrouped-imports - -logger = logging.getLogger(__name__) -# - - -class SqlGenAdmin(object): - - """Builds SQL commands to create table schema from a schema definition derived from class SchemaDefBase. - - Note: - """ - - def __init__(self, verbose=False, serverType="mysql"): - - self.__verbose = verbose - self.__serverType = str(serverType).lower() - - def truncateTableSQL(self, databaseName, tableName): - """Return the SQL string require to truncate (remove all rows) from the input table.""" - return "TRUNCATE TABLE %s.%s; " % (databaseName, tableName) - - def idUpdateTemplateSQL(self, databaseName, tableDefObj, updateAttributeIdList=None, conditionAttributeIdList=None): - """Return the SQL string template for updating the input attributes into the named table subject - to the constraining attributes. - - The string provides formatting placeholders for updated values as well as for constraining values. - - The input table object is used to adjust the value quoting in the returned template string. - - """ - updateAttributeIdList = updateAttributeIdList if updateAttributeIdList else [] - conditionAttributeIdList = conditionAttributeIdList if conditionAttributeIdList else [] - tableName = tableDefObj.getName() - fL = [] - for atId in updateAttributeIdList: - if tableDefObj.isAttributeStringType(atId): - fL.append(" %s=" % tableDefObj.getAttributeName(atId) + "%s") - elif tableDefObj.isAttributeFloatType(atId): - fL.append(" %s=" % tableDefObj.getAttributeName(atId) + "%s") - elif tableDefObj.isAttributeIntegerType(atId): - fL.append(" %s=" % tableDefObj.getAttributeName(atId) + "%s") - else: - fL.append(" %s=" % tableDefObj.getAttributeName(atId) + "%s") - # - if conditionAttributeIdList: - cL = [] - for atId in conditionAttributeIdList: - if tableDefObj.isAttributeStringType(atId): - cL.append(" %s=" % tableDefObj.getAttributeName(atId) + "%s") - else: - cL.append(" %s=" % tableDefObj.getAttributeName(atId) + "%s") - tS = "UPDATE %s.%s SET %s WHERE (%s);" % (databaseName, tableName, ",".join(fL), ",".join(cL)) - else: - tS = "UPDATE %s.%s SET %s;" % (databaseName, tableName, ",".join(fL)) - # - return tS - - def idInsertTemplateSQL(self, databaseName, tableDefObj, insertAttributeIdList=None): - """Return the SQL string template for inserting the input attributes into the named table. - - The string provides formatting placeholders for updated values as well as for constraining values. - - The input table object is used to adjust the value quoting in the returned template string. - - """ - insertAttributeIdList = insertAttributeIdList if insertAttributeIdList else [] - tableName = tableDefObj.getName() - attributeNameList = [] - # - fL = [] - for atId in insertAttributeIdList: - attributeNameList.append(tableDefObj.getAttributeName(atId)) - if tableDefObj.isAttributeStringType(atId): - fL.append("%s") - elif tableDefObj.isAttributeFloatType(atId): - fL.append("%s") - elif tableDefObj.isAttributeIntegerType(atId): - fL.append("%s") - else: - fL.append("%s") - # - # logger.debug(" %r %r %r %r" % (databaseName, tableName, attributeNameList, fL)) - tS = "INSERT INTO %s.%s (%s) VALUES (%s);" % (databaseName, tableName, ",".join(attributeNameList), ",".join(fL)) - # - return tS - - def idDeleteTemplateSQL(self, databaseName, tableDefObj, conditionAttributeIdList=None): - """Return the SQL string template for deleting records in the named table subject - to the constraining attributes. - - The string provides formatting placeholders for constraining values. - - The input table object is used to adjust the value quoting in the returned template string. - - """ - conditionAttributeIdList = conditionAttributeIdList if conditionAttributeIdList else [] - tableName = tableDefObj.getName() - # - if conditionAttributeIdList: - cL = [] - for atId in conditionAttributeIdList: - if tableDefObj.isAttributeStringType(atId): - cL.append(" %s=" % tableDefObj.getAttributeName(atId) + "%s") - else: - cL.append(" %s=" % tableDefObj.getAttributeName(atId) + "%s") - - tS = "DELETE FROM %s.%s WHERE (%s);" % (databaseName, tableName, " AND ".join(cL)) - else: - tS = "DELETE FROM %s.%s;" % (databaseName, tableName) - # - return tS - - def insertTemplateSQL(self, databaseName, tableName, attributeNameList=None): - """Return the SQL string template for inserting the input attributes into the named table. - - The string provides formatting placeholders for inserted values that are added when - the SQL command is executed. - - """ - attributeNameList = attributeNameList if attributeNameList else [] - fL = [] - for _ in attributeNameList: - fL.append("%s") - # - tS = "INSERT INTO %s.%s (%s) VALUES (%s);" % (databaseName, tableName, ",".join(attributeNameList), ",".join(fL)) - return tS - - def insertTemplateSQLCrate(self, databaseName, tableName, attributeNameList=None): - """Return the SQL string template for inserting the input attributes into the named table. - - The string provides formatting placeholders for inserted values that are added when - the SQL command is executed. - - cursor.executemany("INSERT INTO locations (name, date, kind, position) VALUES (?, ?, ?, ?)", - ... [('Cloverleaf', '2007-03-11', 'Quasar', 7), - ... ('Old Faithful', '2007-03-11', 'Quasar', 7)]) - [{u'rowcount': 1}, {u'rowcount': 1}] - """ - attributeNameList = attributeNameList if attributeNameList else [] - fL = [] - for _ in attributeNameList: - fL.append("?") - # - tS = "INSERT INTO %s.%s (%s) VALUES (%s);" % (databaseName, tableName, ",".join(attributeNameList), ",".join(fL)) - return tS - - def refreshTableSQLCrate(self, databaseName, tableName): - """Return the SQL string to refresh named table.""" - # - tS = "REFRESH TABLE %s.%s;" % (databaseName, tableName) - return tS - - def deleteTemplateSQL(self, databaseName, tableName, attributeNameList=None): - """Return the SQL string template for deleting table records constrained by the input attributes. - - The string provides formatting placeholders for the constraining values. - delete from where at1=%s and at2=%s - - """ - attributeNameList = attributeNameList if attributeNameList else [] - fL = [] - for v in attributeNameList: - fL.append(" %s=" % v + "%s") - # - tS = "DELETE FROM %s.%s WHERE %s;" % (databaseName, tableName, " AND ".join(fL)) - return tS - - def deleteFromListSQL(self, databaseName, tableName, attributeName, valueList, chunkSize=10): - """Return the SQL string for deleting table records for a list of string values of - the input attribute. - - delete from . where attributeName IN (v1,v2,v3); - - """ - sqlList = [] - chunkLists = self.__makeSubLists(chunkSize, valueList) - for chunk in chunkLists: - fL = ["'%s'" % v for v in chunk] - sqlList.append("DELETE FROM %s.%s WHERE %s IN (%s); " % (databaseName, tableName, attributeName, ",".join(fL))) - - return sqlList - - def __makeSubLists(self, num, iterable): - args = [iter(iterable)] * num - return ([e for e in t if e is not None] for t in zip_longest(*args)) - - def createDatabaseSQL(self, databaseName): - """Return a list of strings containing the SQL to drop and recreate the input database. - - DROP DATABASE IF EXISTS ; - CREATE DATABASE ; - """ - oL = [] - oL.append("DROP DATABASE IF EXISTS %s;" % databaseName) - oL.append("CREATE DATABASE %s;" % databaseName) - return oL - - def removeDatabaseSQL(self, databaseName): - """Return a list of strings containing the SQL to drop and recreate the input database. - - DROP DATABASE IF EXISTS ; - CREATE DATABASE ; - """ - oL = [] - oL.append("DROP DATABASE IF EXISTS %s;" % databaseName) - return oL - - def createTableSQL(self, databaseName, tableDefObj): - """Return a list of strings containing the SQL commands to create the table and indices - described by the input table definition. - - """ - oL = [] - if self.__serverType == "cratedb": - oL.extend(self.__dropTableCrate(databaseName, tableDefObj.getName())) - oL.extend(self.__createTableCrate(databaseName, tableDefObj)) - # oL.extend(self.__createTableIndices(tableDefObj)) - elif self.__serverType == "cockroachdb": - oL.extend(self.__setDatabase(databaseName)) - oL.extend(self.__dropTable(tableDefObj.getName())) - oL.extend(self.__createTableCockroach(tableDefObj)) - oL.extend(self.__createTableIndices(tableDefObj)) - else: - oL.extend(self.__setDatabase(databaseName)) - oL.extend(self.__dropTable(tableDefObj.getName())) - oL.extend(self.__createTable(tableDefObj)) - oL.extend(self.__createTableIndices(tableDefObj)) - return oL - - def dropTableSQL(self, databaseName, tableDefObj): - oL = [] - if self.__serverType == "cratedb": - oL.extend(self.__dropTableCrate(databaseName, tableDefObj.getName())) - else: - oL.extend(self.__setDatabase(databaseName)) - oL.extend(self.__dropTable(tableDefObj.getName())) - return oL - - def __setDatabase(self, databaseName): - """Return a list of strings containing database connection SQL command for the input database - - USE ; - """ - return ["USE %s;" % databaseName] - - def __dropTable(self, tableName): - """Return a list of strings containing the SQL DROP TABLE command for the input table: - - DROP TABLE IF EXISTS ; - """ - return ["DROP TABLE IF EXISTS %s;" % tableName] - - def __createTable(self, tableDefObj): - """Return a list of strings containing the SQL command to create the table described in - input table schema definition object. - - """ - oL = [] - pkL = [] - # - attributeIdList = tableDefObj.getAttributeIdList() - # - oL.append("CREATE TABLE %s (" % tableDefObj.getName()) - for attributeId in attributeIdList: - # - name = tableDefObj.getAttributeName(attributeId) - - sqlType = str(tableDefObj.getAttributeType(attributeId)).upper() - width = int(tableDefObj.getAttributeWidth(attributeId)) - # - precision = int(tableDefObj.getAttributePrecision(attributeId)) - notNull = "not null" if not tableDefObj.getAttributeNullable(attributeId) else " null default null" - if tableDefObj.getAttributeIsPrimaryKey(attributeId): - pkL.append(name) - # - if (sqlType == "CHAR") or (sqlType == "VARCHAR"): - sW = "%-s(%d)" % (sqlType, width) - tS = "%-40s %-16s %s" % (name, sW, notNull) - elif sqlType.startswith("INT") or sqlType in ["INTEGER", "BIGINT", "SMALLINT"]: - tS = "%-40s %-16s %s" % (name, sqlType, notNull) - elif sqlType in ["FLOAT", "REAL", "DOUBLE PRECISION"]: - tS = "%-40s %-16s %s" % (name, sqlType, notNull) - elif (sqlType == "DATE") or (sqlType == "DATETIME"): - tS = "%-40s %-16s %s" % (name, sqlType, notNull) - elif (sqlType == "TEXT") or (sqlType == "MEDIUMTEXT") or (sqlType == "LONGTEXT"): - tS = "%-40s %-16s %s" % (name, sqlType, notNull) - elif (sqlType == "DECIMAL") or (sqlType == "NUMERIC"): - sW = "%-s(%d,%d)" % (sqlType, width, precision) - tS = "%-40s %-16s %s" % (name, sW, notNull) - else: - tS = None - # - # if ii < len(attributeIdList) -1: - # oL.append(tS+",") - # else: - # oL.append(tS) - if tS: - oL.append(tS + ",") - - if pkL: - oL.append("PRIMARY KEY (%s)" % (",".join(pkL))) - - if str(tableDefObj.getType()).upper() == "TRANSACTIONAL": - oL.append(") ENGINE InnoDB;") - else: - oL.append(") ENGINE MyISAM;") - # - # return this as list containing a single string command. - return ["\n".join(oL)] - - def __createTableCockroach(self, tableDefObj, addEngine=False): - """Return a list of strings containing the SQL command to create the table described in - input table schema definition object. - - """ - - oL = [] - pkL = [] - # - attributeIdList = tableDefObj.getAttributeIdList() - # - oL.append("CREATE TABLE %s (" % tableDefObj.getName()) - for attributeId in attributeIdList: - # - name = tableDefObj.getAttributeName(attributeId) - - sqlType = str(tableDefObj.getAttributeType(attributeId)).upper() - width = int(tableDefObj.getAttributeWidth(attributeId)) - precision = int(tableDefObj.getAttributePrecision(attributeId)) - notNull = "not null" if not tableDefObj.getAttributeNullable(attributeId) else " null default null" - if tableDefObj.getAttributeIsPrimaryKey(attributeId): - pkL.append(name) - # - if (sqlType == "CHAR") or (sqlType == "VARCHAR"): - sW = "%-s(%d)" % (sqlType, width) - tS = "%-40s %-16s %s" % (name, sW, notNull) - elif sqlType.startswith("INT") or sqlType in ["INTEGER", "BIGINT", "SMALLINT"]: - tS = "%-40s %-16s %s" % (name, sqlType, notNull) - elif sqlType in ["FLOAT", "REAL", "DOUBLE PRECISION"]: - tS = "%-40s %-16s %s" % (name, sqlType, notNull) - elif sqlType == "DATE": - tS = "%-40s %-16s %s" % (name, sqlType, notNull) - elif sqlType == "DATETIME": - tS = "%-40s %-16s %s" % (name, "timestamp", notNull) - elif (sqlType == "TEXT") or (sqlType == "MEDIUMTEXT") or (sqlType == "LONGTEXT"): - tS = "%-40s %-16s %s" % (name, "text", notNull) - elif (sqlType == "DECIMAL") or (sqlType == "NUMERIC"): - sW = "%-s(%d,%d)" % (sqlType, width, precision) - tS = "%-40s %-16s %s" % (name, sW, notNull) - else: - tS = None - # - # if ii < len(attributeIdList) -1: - # oL.append(tS+",") - # else: - # oL.append(tS) - if tS: - oL.append(tS + ",") - - if pkL: - oL.append("PRIMARY KEY (%s)" % (",".join(pkL))) - - if addEngine: - if str(tableDefObj.getType()).upper() == "TRANSACTIONAL": - oL.append(") ENGINE InnoDB;") - else: - oL.append(") ENGINE MyISAM;") - else: - oL.append(");") - # - # return this as list containing a single string command. - return ["\n".join(oL)] - - def __dropTableCrate(self, databaseName, tableName): - """Return a list of strings containing the SQL DROP TABLE command for the input table: - - DROP TABLE IF EXISTS ; - """ - return ["DROP TABLE IF EXISTS %s.%s;" % (databaseName, tableName)] - - def __filterColumnName(self, name): - rName = str(name).lower() - if rName[0].isdigit(): - rName = "the_" + rName - elif rName in ["offset"]: - rName = "the_" + rName - return rName - - def __createTableCrate(self, databaseName, tableDefObj): - """Return a list of strings containing the SQL command to create the table described in - input table schema definition object. CrateDb variant - - """ - oL = [] - pkL = [] - # - attributeIdList = tableDefObj.getAttributeIdList() - # - tableName = str(tableDefObj.getName()).lower() - oL.append("CREATE TABLE %s.%s (" % (databaseName, tableName)) - for attributeId in attributeIdList: - # - name = self.__filterColumnName(tableDefObj.getAttributeName(attributeId)) - - sqlType = str(tableDefObj.getAttributeType(attributeId)).upper() - # width = int(tableDefObj.getAttributeWidth(attributeId)) - # precision = int(tableDefObj.getAttributePrecision(attributeId)) - notNull = "not null" if not tableDefObj.getAttributeNullable(attributeId) else " " - - if tableDefObj.getAttributeIsPrimaryKey(attributeId): - pkL.append(name) - # - if (sqlType == "CHAR") or (sqlType == "VARCHAR"): - # sW = "%-s(%d)" % ("string", width) - tS = "%-40s %-16s %s" % (name, "string", notNull) - elif sqlType.startswith("INT") or sqlType in ["INTEGER", "BIGINT", "SMALLINT"]: - tS = "%-40s %-16s %s" % (name, "long", notNull) - elif sqlType in ["FLOAT", "REAL", "DOUBLE PRECISION"]: - tS = "%-40s %-16s %s" % (name, "float", notNull) - elif (sqlType == "DATE") or (sqlType == "DATETIME"): - tS = "%-40s %-16s %s" % (name, "string", notNull) - elif (sqlType == "TEXT") or (sqlType == "MEDIUMTEXT") or (sqlType == "LONGTEXT"): - tS = "%-40s %-16s %s" % (name, "string", notNull) - elif (sqlType == "DECIMAL") or (sqlType == "NUMERIC"): - tS = "%-40s %-16s %s" % (name, "float", notNull) - # sW = "%-s(%d,%d)" % (sqlType, width, precision) - # tS = "%-40s %-16s %s" % (name, sW, notNull) - else: - tS = None - # - if tS: - oL.append(tS + ",") - - tL = self.__createTableIndicesCrate(tableDefObj) - for tV in tL: - oL.append(tV) - if pkL: - oL.append("PRIMARY KEY (%s)" % (",".join(pkL))) - - oL.append(") WITH (number_of_replicas='1-all');") - - # return this as list containing a single string command. - return ["\n".join(oL)] - - def __createTableIndices(self, tableDefObj): - """Return a list of strings containing the SQL command to create any indices described in - input table schema definition object. - - """ - oL = [] - tableName = tableDefObj.getName() - indexNameList = tableDefObj.getIndexNames() - for indexName in indexNameList: - indexType = tableDefObj.getIndexType(indexName) - if str(indexType).upper() == "SEARCH": - indexType = "" - tL = [] - tL.append("CREATE %s INDEX %s on %s (" % (indexType, indexName, tableName)) - attributeIdList = tableDefObj.getIndexAttributeIdList(indexName) - for ii, attributeId in enumerate(attributeIdList): - name = tableDefObj.getAttributeName(attributeId) - tS = "%-s" % name - if ii < len(attributeIdList) - 1: - tL.append(tS + ",") - else: - tL.append(tS) - tL.append(");") - oL.append(" ".join(tL)) - - return oL - - def __createTableIndicesCrate(self, tableDefObj): - """Return a list of strings containing the SQL command to create any indices described in - input table schema definition object. - with (analyzer = 'english') - """ - oL = [] - # tableName = tableDefObj.getName() - indexNameList = tableDefObj.getIndexNames() - for indexName in indexNameList: - # indexType = tableDefObj.getIndexType(indexName) - tL = [] - tL.append("INDEX %s USING FULLTEXT (" % (indexName)) - attributeIdList = tableDefObj.getIndexAttributeIdList(indexName) - attributeIdListS = [] - for ii, attributeId in enumerate(attributeIdList): - sqlType = str(tableDefObj.getAttributeType(attributeId)).upper() - if sqlType in ["VARCHAR", "CHAR", "TEXT", "DATE", "DATETIME", "MEDIUMTEXT", "LONGTEXT"]: - attributeIdListS.append(attributeId) - # - for ii, attributeId in enumerate(attributeIdListS): - - name = self.__filterColumnName(tableDefObj.getAttributeName(attributeId)) - tS = "%-s" % name - if ii < len(attributeIdListS) - 1: - tL.append(tS + ",") - else: - tL.append(tS) - # tL.append(") with (analyzer = 'english'),") - tL.append(") ,") - oL.append(" ".join(tL)) - - return oL - - def exportTable(self, databaseName, tableDefObj, exportPath, withDoubleQuotes=False): - """ """ - tableName = tableDefObj.getName() - aNames = tableDefObj.getAttributeNameList() - # - oL = [] - oL.append("SELECT %s " % ",".join(aNames)) - oL.append(" INTO OUTFILE %s " % exportPath) - oL.append("FIELDS TERMINATED BY '&##&\\t' ") - if withDoubleQuotes: - oL.append(" OPTIONALLY ENCLOSED BY '\"' ") - oL.append("LINES TERMINATED BY '$##$\\n' ") - oL.append("FROM %s.%s " % (databaseName, tableName)) - oL.append(";") - return "\n".join(oL) - - def importTable(self, databaseName, tableDefObj, importPath, withTruncate=False, withDoubleQuotes=False): - """Create the SQL commands to import data files stored in charactore delimited data files into the - in put database and table. Input data may be optionally enclosed in double quotes. - - An options is provied to pre-truncate the table before loading. - - Return: a string containing the SQL for the load command. - """ - tableName = tableDefObj.getName() - aNames = tableDefObj.getAttributeNameList() - # - oL = [] - if withTruncate: - oL.append("TRUNCATE TABLE %s.%s; " % (databaseName, tableName)) - - # oL.append("SET @@GLOBAL.local_infile = 1; ") - oL.append("LOAD DATA LOCAL INFILE '%s' " % importPath) - oL.append("INTO TABLE %s.%s " % (databaseName, tableName)) - oL.append("FIELDS TERMINATED BY '&##&\\t' ") - - if withDoubleQuotes: - oL.append("OPTIONALLY ENCLOSED BY '\"' ") - - oL.append("LINES TERMINATED BY '$##$\\n' ") - oL.append(" (%s) " % ",".join(aNames)) - oL.append(";") - return " ".join(oL) - - -class SqlGenQuery(object): - - """Builds an the SQL command string for a selection query.""" - - def __init__(self, schemaDefObj, verbose=False): - """Input: - - schemaDef is instance of class derived from SchemaDefBase(). - """ - self.__schemaDefObj = schemaDefObj - self.__verbose = verbose - # - self.__databaseName = self.__schemaDefObj.getDatabaseName() - self.__selectList = [] - self.__orderList = [] - self.__conditionObj = None - self.__sortOrder = "DESC" - self.__limitStart = None - self.__limitLength = None - # - - def setDatabase(self, databaseName): - self.__databaseName = databaseName - - def clear(self): - self.__databaseName = self.__schemaDefObj.getDatabaseName() - self.__selectList = [] - self.__orderList = [] - self.__conditionObj = None - self.__sortOrder = "DESC" - self.__limitStart = None - self.__limitLength = None - - def addSelectLimit(self, rowStart=None, rowLength=None): - try: - self.__limitStart = int(rowStart) - self.__limitLength = int(rowLength) - return True - except Exception: - return False - - def addSelectAttributeId(self, attributeTuple=(None, None)): - """Add the input attribute to the current attribute select list. - - where attributeTuple contains (tableId,attributeId) - - """ - self.__selectList.append(attributeTuple) - return True - - def setOrderBySortOrder(self, direction="ASC"): - """The default sort order applied to attributes in the ORDER BY clause. (ASC|DESC)""" - self.__sortOrder = direction - - def addOrderByAttributeId(self, attributeTuple=(None, None), sortFlag="DEFAULT"): - """Add the input attribute to the current orderBy list. - - where attributeTuple contains (tableId,attributeId) - - """ - sf = self.__sortOrder if sortFlag == "DEFAULT" else sortFlag - self.__orderList.append((attributeTuple, sf)) - return True - - def setCondition(self, conditionObj): - """Set an instance of the condition object from the SqlGenCondition() class.""" - self.__conditionObj = conditionObj - return True - - def getSql(self): - """ """ - return self.__makeSql() - - def __makeSql(self): - """Builds SQL string for the query from the current list of attributes, list of - ORDER BY attributes and the constrainObj. - """ - # - if not self.__selectList: - return None - # - # Attribute names from select list - - # - aNames = [self.__schemaDefObj.getQualifiedAttributeName(aTup) for aTup in self.__selectList] - # - # Table Id's from the select list - - # - tIds = [aTup[0] for aTup in self.__selectList] - # - conditionSql = None - if self.__conditionObj is not None: - conditionSql = self.__conditionObj.getSql() - tIds.extend(self.__conditionObj.getTableIdList()) - # - oNames = [] - if self.__orderList: - oNames = [self.__schemaDefObj.getQualifiedAttributeName(aTup) + " " + sortFlag for aTup, sortFlag in self.__orderList] - # - tIds = list(set(tIds)) - tNames = [self.__databaseName + "." + self.__schemaDefObj.getSchemaName(tId) for tId in tIds] - # - oL = [] - oL.append("SELECT %s " % ",".join(aNames)) - oL.append(" FROM %s " % ",".join(tNames)) - if conditionSql is not None and conditionSql: - oL.append(" WHERE %s " % conditionSql) - # - if oNames: - oL.append(" ORDER BY %s " % (",".join(oNames))) - - if (self.__limitStart is not None) and (self.__limitLength is not None): - oL.append(" LIMIT %d, %d " % (self.__limitStart, self.__limitLength)) - - oL.append(";") - # - # - return "\n".join(oL) - - -class SqlGenCondition(object): - - """Builds the Condition portion of an SQL selection or related query.""" - - def __init__(self, schemaDefObj, addKeyJoinFlag=True, verbose=False): - """Input: - - schemaDef is instance of class derived from SchemaDefBase(). - """ - self.__schemaDefObj = schemaDefObj - self.__verbose = verbose - self.__ops = ["EQ", "NE", "GE", "GT", "LT", "LE", "LIKE", "NOT LIKE", "IS", "IS NOT"] - self.__opDict = {"EQ": "=", "NE": "!=", "GE": ">=", "GT": ">", "LT": "<", "LE": "<=", "LIKE": "LIKE", "NOT LIKE": "NOT LIKE", "IS": "IS", "IS NOT": "IS NOT"} - self.__logOps = ["AND", "OR", "NOT"] - self.__grpOps = ["BEGIN", "END"] - # - self.__cList = [] - self.__tableIdList = [] - self.__numConditions = 0 - self.__addKeyJoinFlag = addKeyJoinFlag - # - - def clear(self): - self.__cList = [] - self.__tableIdList = [] - self.__numConditions = 0 - return True - - def set(self, conditionDefList=None): - """Set/reset the current condition list --- The input is used verbatim and unmodified.""" - if conditionDefList is not None: - self.__cList = conditionDefList - self.__tableIdList = [] - for cV in self.__cList: - self.__updateTableList(cV) - return True - else: - return False - - def __updateTableList(self, cObj): - """Add the tables included in the input condition to the internal table list.""" - cType = cObj[0] - if cType in ["VALUE_CONDITION", "VALUE_LIST_CONDITION"]: - cType, lhsTuple, _, rhsTuple = cObj - lTableId, _ = lhsTuple - self.__addTable(lTableId) - return True - elif cType in ["JOIN_CONDITION"]: - cType, lhsTuple, _, rhsTuple = cObj - lTableId, _ = lhsTuple - rTableId, _ = rhsTuple - self.__addTable(lTableId) - self.__addTable(rTableId) - return True - else: - return False - - def __addTable(self, tableId): - if tableId not in self.__tableIdList: - self.__tableIdList.append(tableId) - return True - else: - return False - - def get(self): - return self.__cList - - def getSql(self): - if self.__addKeyJoinFlag: - self.addKeyAttributeEquiJoinConditions() - return self.__makeSql() - - def getTableIdList(self): - return self.__tableIdList - - def addTables(self, tableIdList): - """Add the tables from the input tableIdList to the internal list of tables. - - The internal list of tables is used to materialize join contraints between - all tables based on primary keys defined in the schema definition. - """ - for tableId in tableIdList: - self.__addTable(tableId) - return True - - def addValueCondition(self, lhsTuple=None, opCode=None, rhsTuple=None, preOp="AND"): - """Adds a condition to the current contraint list - - - lhsTuple = (TableId,AttributeId) - opCode = one of the operations defined in self.__opDict.keys() - rhsTuple = (value,type) where for - - simple values - - - (simpleValue,'CHAR|OTHER') - preOp = logical operator preceding this contraint in the current contraint list. - - """ - cObj = ("VALUE_CONDITION", lhsTuple, opCode, rhsTuple) - if cObj not in self.__cList: - self.__updateTableList(cObj) - if preOp in ["AND", "OR"]: - self.addLogicalOp(lOp=preOp) - self.addBeginGroup() - self.__cList.append(cObj) - self.addEndGroup() - self.__numConditions += 1 - return self.__numConditions - - def addGroupValueConditionList(self, cDefList, preOp="AND"): - """Add a value alternative condition to the current contraint list - using the input list of value condition definitions defined as - - - cDefList = [(lPreOp,lhsTuple, opCode, rhsTuple), ...] - lPreOp = local logical conjunction used to add condition within the group (leading value is ignored) - lhsTuple = (TableId,AttributeId) - opCode = one of the operations defined in self.__opDict.keys() - rhsTuple = (value,type) where for - - simple values are defined as - - - (simpleValue,'CHAR|') < CHAR > types are quoted - - preOp is the logical conjuction used to add the group condition to current condition list. - - """ - if not cDefList: - return self.__numConditions - if preOp in ["AND", "OR"]: - self.addLogicalOp(lOp=preOp) - # - self.addBeginGroup() - for ii, cDef in enumerate(cDefList): - (lPreOp, lhsTuple, opCode, rhsTuple) = cDef - cObj = ("VALUE_CONDITION", lhsTuple, opCode, rhsTuple) - self.__updateTableList(cObj) - if ii > 0: - self.addLogicalOp(lOp=lPreOp) - self.addBeginGroup() - self.__cList.append(cObj) - self.addEndGroup() - - self.__numConditions += 1 - - self.addEndGroup() - return self.__numConditions - - def addJoinCondition(self, lhsTuple=None, opCode=None, rhsTuple=None, preOp="AND"): - """Adds a join condition to the current contraint list - - - lhsTuple = (TableId,AttributeId) - opCode = one of the operations defined in self.__opDict.keys() - rhsTuple = (TableId,AttributeId) - - For join conditions rhsTuple values implemented as - - - (tableId,attributeId) - - rhsTuple type is currrently only implemented as 'ATTRIBUTE' target but - but could be extended to support other targets for different operators. - - preOp = logical operator preceding this contraint in the current contraint list. - - """ - cObj = ("JOIN_CONDITION", lhsTuple, opCode, rhsTuple) - if cObj not in self.__cList: - self.__updateTableList(cObj) - if preOp in ["AND", "OR"]: - self.addLogicalOp(lOp=preOp) - self.addBeginGroup() - self.__cList.append(cObj) - self.addEndGroup() - self.__numConditions += 1 - return self.__numConditions - - def addLogicalOp(self, lOp): - """Adds a logical operation into the current condition list. - - lOp = one of 'AND','OR', 'NOT' - """ - self.__cList.append(("LOG_OP", lOp)) - - def addBeginGroup(self): - """Inserts the beginning of a parenthetical group in the current condition list.""" - self.__cList.append(("GROUPING", "BEGIN")) - - def addEndGroup(self): - """Inserts the ending of a parenthetical group in the current condition list.""" - self.__cList.append(("GROUPING", "END")) - - def addKeyAttributeEquiJoinConditions(self): - """Auto add equi-join contraints between tables in the current table list -""" - # self.__KeyJoinDone = True - if len(self.__tableIdList) < 2: - return 0 - cList = copy.deepcopy(self.__cList) - self.__cList = [] - tablePairList = [t for t in itertools.combinations(self.__tableIdList, 2)] - for (t1, t2) in tablePairList: - self.__addInterTableJoinContraints(t1, t2) - for cV in cList: - if cV[0] in ["JOIN_CONDITION"] and cV in self.__cList: - continue - self.__cList.append(cV) - - return len(tablePairList) - - def __makeSql(self): - """Builds SQL string for the query condition encoded in the input contraint command list. - - The condition command list is a sequence of tuples with the following syntax: - - ('VALUE_CONDITION', (tableId,attributeId), 'EQ'|'NE'|GE'|'GT'|'LT'|'LE'|'LIKE'|'IS'|'IS NOT', (Value, 'CHAR'|'OTHER')) - - ('VALUE_LIST_CONDITION', (tableId,attributeId), 'IN'|'NOT IN', (valueList, 'CHAR'|'INT'|'FLOAT')) - - ('JOIN_CONDITION', (tableId,attributeId), 'EQ'|'NE'|'GE'|'GT'|'LT'|'LE'|'LIKE', (tableId,attributeId) ) - - ('LOG_OP', 'AND'|'OR'|'NOT') -> conjunction / negation - - ('GROUPING', 'BEGIN'|'END') -> (grouping/parenthetical control) - - """ - # - cSqlL = [] - # - # - cCount = 0 - for cV in self.__cList: - cType = (cV[0]).upper() - if cType == "VALUE_CONDITION": - cCount += 1 - (_, lhsTuple, opId, rhsTuple) = cV - # (tableId, attributeId) = lhsTuple - (value, vType) = rhsTuple - vType = vType.upper() - qAttributeName = self.__schemaDefObj.getQualifiedAttributeName(tableAttributeTuple=lhsTuple) - # jdw quote date and datetime - if vType in ["CHAR", "VARCHAR", "DATE", "DATETIME"]: - cSqlL.append(" %s %s '%s' " % (qAttributeName, self.__opDict[opId], value)) - else: - cSqlL.append(" %s %s %s " % (qAttributeName, self.__opDict[opId], value)) - - elif cType == "VALUE_LIST_CONDITION": - cCount += 1 - (_, lhsTuple, opId, rhsTuple) = cV - # (tableId, attributeId) = lhsTuple - (valueList, vType) = rhsTuple - vType = vType.upper() - qAttributeName = self.__schemaDefObj.getQualifiedAttributeName(tableAttributeTuple=lhsTuple) - # - qVL = [] - for value in valueList: - if vType in ["CHAR", "VARCHAR"]: - qV = "'%s'" % value - elif vType in ["INT", "INTEGER"]: - qV = "%i" % value - elif vType in ["FLOAT", "DOUBLE", "DECIMAL"]: - qV = "%f" % value - else: - qV = value - qVL.append(qV) - vS = ",".join - cSqlL.append(" %s %s [ %s ] " % (qAttributeName, self.__opDict[opId], vS)) - - elif cType == "JOIN_CONDITION": - cCount += 1 - (_, lhsTuple, opId, rhsTuple) = cV - # (lTableId, lAttributeId) = lhsTuple - # (rTableId, rAttributeId) = rhsTuple - lAttributeName = self.__schemaDefObj.getQualifiedAttributeName(tableAttributeTuple=lhsTuple) - rAttributeName = self.__schemaDefObj.getQualifiedAttributeName(tableAttributeTuple=rhsTuple) - # - cSqlL.append(" %s %s %s " % (lAttributeName, self.__opDict[opId], rAttributeName)) - - elif cType == "LOG_OP": - (_, logOp) = cV - if logOp not in ["NOT"] and cCount > 0: - cSqlL.append(" %s " % logOp) - elif cType == "GROUPING": - (_, group) = cV - if group == "BEGIN": - cSqlL.append(" ( ") - elif group == "END": - cSqlL.append(" ) ") - else: - pass - - return "\n".join(cSqlL) - - def __addInterTableJoinContraints(self, lTableId, rTableId): - """The ...""" - lTdef = self.__schemaDefObj.getSchemaObject(lTableId) - lKeyAttributeIdL = lTdef.getPrimaryKeyAttributeIdList() - rTdef = self.__schemaDefObj.getSchemaObject(rTableId) - rKeyAttributeIdL = rTdef.getPrimaryKeyAttributeIdList() - # - commonAttributeIdSet = set(lKeyAttributeIdL) & set(rKeyAttributeIdL) - logger.debug("+SqlGenCondition.__addInterTableJoinConditions lTable %s rTable %s common keys %r\n", lTableId, rTableId, commonAttributeIdSet) - # - for attributeId in commonAttributeIdSet: - lhsTuple = (lTableId, attributeId) - opCode = "EQ" - rhsTuple = (rTableId, attributeId) - self.addJoinCondition(lhsTuple=lhsTuple, opCode=opCode, rhsTuple=rhsTuple, preOp="AND") - # - return len(commonAttributeIdSet) diff --git a/rcsb/db/sql/__init__.py b/rcsb/db/sql/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/rcsb/db/tests-cockroach/ConnectionTests.py b/rcsb/db/tests-cockroach/ConnectionTests.py deleted file mode 100644 index 7b008350..00000000 --- a/rcsb/db/tests-cockroach/ConnectionTests.py +++ /dev/null @@ -1,78 +0,0 @@ -## -# File: ConnectionTests.py -# Author: J. Westbrook -# Date: 12-Mar-2018 -# Version: 0.001 -# -# Updates: -# 27-Mar-2018 jdw inject configuration for configuration object rather than environment -## -""" -Test cases opening database connections. - -""" -__docformat__ = "restructuredtext en" -__author__ = "John Westbrook" -__email__ = "jwest@rcsb.rutgers.edu" -__license__ = "Apache 2.0" - - -import logging -import os -import time -import unittest - -from rcsb.db.cockroach.Connection import Connection -from rcsb.utils.config.ConfigUtil import ConfigUtil - -logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s") -logger = logging.getLogger() - -HERE = os.path.abspath(os.path.dirname(__file__)) -TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE))) - - -class ConnectionBaseTests(unittest.TestCase): - def setUp(self): - configPath = os.path.join(TOPDIR, "rcsb", "db", "config", "exdb-config-example.yml") - configName = "site_info_configuration" - self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName) - self.__resourceName = "COCKROACH_DB" - - self.__startTime = time.time() - logger.debug("Starting at %s", time.strftime("%Y %m %d %H:%M:%S", time.localtime())) - - def tearDown(self): - endTime = time.time() - logger.debug("Completed at %s (%.4f seconds)", time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime) - - def testCreateConnection(self): - """Test case - connection creation""" - try: - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - self.assertNotEqual(client, None) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testCreateMultipleConnections(self): - """Test case - multiple connection creation""" - try: - for _ in range(25): - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - self.assertNotEqual(client, None) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - -def suiteOpen(): - suiteSelect = unittest.TestSuite() - suiteSelect.addTest(ConnectionBaseTests("testCreateConnection")) - suiteSelect.addTest(ConnectionBaseTests("testCreateMultipleConnections")) - return suiteSelect - - -if __name__ == "__main__": - mySuite = suiteOpen() - unittest.TextTestRunner(verbosity=2).run(mySuite) diff --git a/rcsb/db/tests-cockroach/SchemaDefLoaderCockroachDbMultiTests.py b/rcsb/db/tests-cockroach/SchemaDefLoaderCockroachDbMultiTests.py deleted file mode 100644 index c28636e2..00000000 --- a/rcsb/db/tests-cockroach/SchemaDefLoaderCockroachDbMultiTests.py +++ /dev/null @@ -1,282 +0,0 @@ -## -# File: SchemaDefLoaderCockroachDbMultiTests.py -# Author: J. Westbrook -# Date: 10-Feb-2018 -# Version: 0.001 -# -# Updates: -# 2-Apr-2018 jdw update for refactored api's an utils -# -## -""" -Tests for creating and loading distributed rdbms database using PDBx/mmCIF data files -and external schema definitions using CockroachDb services - Covers BIRD, CCD and PDBx/mmCIF -model files - Multiprocessor mode tests - -The following test settings from the configuration file be used will a fallback to localhost/26257. - - COCKROACH_DB_USER_NAME - [COCKROACH_DB_PW] - COCKROACH_DB_NAME - COCKROACH_DB_HOST - -See also the load length limit for each file type for testing - Set to None to remove - - - self.__fileLimit = 100 - -""" - -__docformat__ = "restructuredtext en" -__author__ = "John Westbrook" -__email__ = "jwest@rcsb.rutgers.edu" -__license__ = "Apache 2.0" - - -import logging -import os -import time -import unittest - -from rcsb.db.cockroach.CockroachDbLoader import CockroachDbLoader -from rcsb.db.cockroach.CockroachDbUtil import CockroachDbQuery -from rcsb.db.cockroach.Connection import Connection - -# -from rcsb.db.sql.SqlGen import SqlGenAdmin -from rcsb.utils.repository.RepositoryProvider import RepositoryProvider -from rcsb.db.utils.SchemaProvider import SchemaProvider -from rcsb.utils.config.ConfigUtil import ConfigUtil -from rcsb.utils.multiproc.MultiProcUtil import MultiProcUtil - -logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s") -logger = logging.getLogger() - -try: - from mmcif.io.IoAdapterCore import IoAdapterCore as IoAdapter -except Exception: - from mmcif.io.IoAdapterPy import IoAdapterPy as IoAdapter - - -logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s") -logger = logging.getLogger() - -HERE = os.path.abspath(os.path.dirname(__file__)) -TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE))) - - -class SchemaDefLoaderCockroachDbMultiTests(unittest.TestCase): - def __init__(self, methodName="runTest"): - super(SchemaDefLoaderCockroachDbMultiTests, self).__init__(methodName) - self.__verbose = True - self.__createFlag = True - - def setUp(self): - self.__verbose = True - self.__numProc = 2 - self.__fileLimit = 100 - self.__chunkSize = 0 - self.__workPath = os.path.join(HERE, "test-output") - self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data") - configPath = os.path.join(TOPDIR, "rcsb", "db", "config", "exdb-config-example.yml") - configName = "site_info_configuration" - self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName) - self.__resourceName = "COCKROACH_DB" - self.__schP = SchemaProvider(self.__cfgOb, self.__workPath, useCache=True) - self.__rpP = RepositoryProvider(cfgOb=self.__cfgOb, numProc=self.__numProc, fileLimit=self.__fileLimit, cachePath=self.__workPath) - # - self.__tableIdSkipD = {"ATOM_SITE": True, "ATOM_SITE_ANISOTROP": True} - # - self.__startTime = time.time() - logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime())) - - def tearDown(self): - endTime = time.time() - logger.debug("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime) - - def testConnection(self): - try: - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - self.assertNotEqual(client, None) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testSchemaCreate(self): - """Create table schema (live) for BIRD, chemical component, and PDBx data.""" - try: - sd, _, _, _ = self.__schP.getSchemaInfo("bird") - ret = self.__schemaCreate(schemaDefObj=sd) - self.assertEqual(ret, True) - # - sd, _, _, _ = self.__schP.getSchemaInfo("chem_comp") - ret = self.__schemaCreate(schemaDefObj=sd) - self.assertEqual(ret, True) - # - sd, _, _, _ = self.__schP.getSchemaInfo("pdbx") - ret = self.__schemaCreate(schemaDefObj=sd) - self.assertEqual(ret, True) - # - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testSchemaRemove(self): - """Remove table schema (live) for BIRD, chemical component, and PDBx data.""" - try: - sd, _, _, _ = self.__schP.getSchemaInfo("bird") - ret = self.__schemaRemove(schemaDefObj=sd) - self.assertEqual(ret, True) - # - sd, _, _, _ = self.__schP.getSchemaInfo("chem_comp") - ret = self.__schemaRemove(schemaDefObj=sd) - self.assertEqual(ret, True) - # - sd, _, _, _ = self.__schP.getSchemaInfo("pdbx") - ret = self.__schemaRemove(schemaDefObj=sd) - self.assertEqual(ret, True) - # - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testLoadChemCompMulti(self): - self.__testLoadFilesMulti("chem_comp") - - def testLoadBirdMulti(self): - self.__testLoadFilesMulti("bird") - - def testLoadPdbxMulti(self): - self.__testLoadFilesMulti("pdbx") - - def __getPathList(self, fType): - pathList = [] - if fType == "chem_comp": - pathList = self.__rpP.getLocatorObjList("chem_comp") - elif fType == "bird": - pathList = self.__rpP.getLocatorObjList("bird") - pathList.extend(self.__rpP.getLocatorObjList("bird_family")) - elif fType == "pdbx": - pathList = self.__rpP.getLocatorObjList("pdbx") - return pathList - - def loadInsertMany(self, dataList, procName, optionsD, workingDir): - - try: - _ = workingDir - ret = None - sd = optionsD["sd"] - skipD = optionsD["skip"] - ioObj = IoAdapter(verbose=self.__verbose) - logger.debug("%s pathlist %r", procName, dataList) - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - sdl = CockroachDbLoader(schemaDefObj=sd, ioObj=ioObj, dbCon=client, workPath=self.__workPath, cleanUp=False, warnings="default", verbose=self.__verbose) - ret = sdl.load(inputPathList=dataList, loadType="cockroach-insert-many", deleteOpt="selected", tableIdSkipD=skipD) - # all or nothing here - if ret: - return dataList, dataList, [] - else: - return [], [], [] - except Exception as e: - logger.info("Failing with dataList %r", dataList) - logger.exception("Failing with %s", str(e)) - - return [], [], [] - - def __testLoadFilesMulti(self, contentType): - """Test case - create load w/insert-many all chemical component definition data files - (multiproc test)""" - numProc = self.__numProc - chunkSize = self.__chunkSize - try: - # - sd, _, _, _ = self.__schP.getSchemaInfo(contentType) - if self.__createFlag: - self.__schemaCreate(schemaDefObj=sd) - - optD = {} - optD["sd"] = sd - if contentType == "pdbx": - optD["skip"] = self.__tableIdSkipD - else: - optD["skip"] = {} - - # - pathList = self.__getPathList(fType=contentType) - logger.debug("Input path list %r", pathList) - mpu = MultiProcUtil(verbose=True) - mpu.setOptions(optionsD=optD) - mpu.set(workerObj=self, workerMethod="loadInsertMany") - ok, _, _, _ = mpu.runMulti(dataList=pathList, numProc=numProc, numResults=1, chunkSize=chunkSize) - self.assertEqual(ok, True) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def __schemaCreate(self, schemaDefObj): - """Test case - create table schema using schema definition""" - ret = 0 - try: - tableIdList = schemaDefObj.getTableIdList() - sqlGen = SqlGenAdmin(self.__verbose, serverType="CockroachDb") - dbName = schemaDefObj.getVersionedDatabaseName() - sqlL = sqlGen.createDatabaseSQL(dbName) - for tableId in tableIdList: - tableDefObj = schemaDefObj.getTable(tableId) - sqlL.extend(sqlGen.createTableSQL(databaseName=schemaDefObj.getVersionedDatabaseName(), tableDefObj=tableDefObj)) - - logger.debug("\nSchema creation SQL string\n %s\n\n", "\n".join(sqlL)) - logger.info("Creating schema using database %s", schemaDefObj.getVersionedDatabaseName()) - # - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - crQ = CockroachDbQuery(dbcon=client, verbose=self.__verbose) - ret = crQ.sqlCommandList(sqlCommandList=sqlL) - # ret = crQ.sqlCommand(' '.join(sqlL)) - logger.info("Schema create command returns %r\n", ret) - return ret - # - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def __schemaRemove(self, schemaDefObj): - """Test case - remove table schema using schema definition""" - ret = 0 - try: - dbName = schemaDefObj.getVersionedDatabaseName() - sqlGen = SqlGenAdmin(self.__verbose, serverType="CockroachDb") - sqlL = sqlGen.removeDatabaseSQL(dbName) - logger.debug("Schema Remove SQL string\n %s", "\n".join(sqlL)) - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - crQ = CockroachDbQuery(dbcon=client, verbose=self.__verbose) - ret = crQ.sqlCommandList(sqlCommandList=sqlL) - # ret = crQ.sqlCommand(' '.join(sqlL)) - logger.debug("Schema remove command returns %r\n", ret) - return ret - # - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - -def baseSuite(): - suiteSelect = unittest.TestSuite() - suiteSelect.addTest(SchemaDefLoaderCockroachDbMultiTests("testConnection")) - suiteSelect.addTest(SchemaDefLoaderCockroachDbMultiTests("testSchemaCreate")) - suiteSelect.addTest(SchemaDefLoaderCockroachDbMultiTests("testSchemaRemove")) - return suiteSelect - - -def loadSuite(): - suiteSelect = unittest.TestSuite() - suiteSelect.addTest(SchemaDefLoaderCockroachDbMultiTests("testConnection")) - suiteSelect.addTest(SchemaDefLoaderCockroachDbMultiTests("testLoadChemCompMulti")) - suiteSelect.addTest(SchemaDefLoaderCockroachDbMultiTests("testLoadBirdMulti")) - suiteSelect.addTest(SchemaDefLoaderCockroachDbMultiTests("testLoadPdbxMulti")) - return suiteSelect - - -if __name__ == "__main__": - mySuite = baseSuite() - unittest.TextTestRunner(verbosity=2).run(mySuite) - - mySuite = loadSuite() - unittest.TextTestRunner(verbosity=2).run(mySuite) diff --git a/rcsb/db/tests-cockroach/SchemaDefLoaderCockroachDbTests.py b/rcsb/db/tests-cockroach/SchemaDefLoaderCockroachDbTests.py deleted file mode 100644 index f2efd4f9..00000000 --- a/rcsb/db/tests-cockroach/SchemaDefLoaderCockroachDbTests.py +++ /dev/null @@ -1,354 +0,0 @@ -## -# File: CockroachDbLoaderCockroachDbTests.py -# Author: J. Westbrook -# Date: 10-Feb-2018 -# Version: 0.001 -# -# Updates: -# -# All s### 2-Apr-2018 jdw update for refactored api's an utils -## -""" -Tests for creating and loading distributed rdbms database using PDBx/mmCIF data files -and external schema definitions using CockroachDb services - Covers BIRD, CCD and PDBx/mmCIF -model files - - -The following test settings from the configuration will be used will a fallback to localhost/26257. - - COCKROACH_DB_USER_NAME - [COCKROACH_DB_PW] - COCKROACH_DB_NAME - COCKROACH_DB_HOST - -See also the load length limit for each file type for testing - Set to None to remove - - - self.__fileLimit = 100 - -""" - -__docformat__ = "restructuredtext en" -__author__ = "John Westbrook" -__email__ = "jwest@rcsb.rutgers.edu" -__license__ = "Apache 2.0" - - -import logging -import os -import time -import unittest - -from rcsb.db.cockroach.CockroachDbLoader import CockroachDbLoader -from rcsb.db.cockroach.CockroachDbUtil import CockroachDbQuery -from rcsb.db.cockroach.Connection import Connection - -# -from rcsb.db.sql.SqlGen import SqlGenAdmin -from rcsb.utils.repository.RepositoryProvider import RepositoryProvider -from rcsb.db.utils.SchemaProvider import SchemaProvider -from rcsb.utils.config.ConfigUtil import ConfigUtil - -logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s") -logger = logging.getLogger() - - -try: - from mmcif.io.IoAdapterCore import IoAdapterCore as IoAdapter -except Exception: - from mmcif.io.IoAdapterPy import IoAdapterPy as IoAdapter - - -logging.basicConfig(level=logging.DEBUG, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s") -logger = logging.getLogger() - -HERE = os.path.abspath(os.path.dirname(__file__)) -TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE))) - - -class CockroachDbLoaderCockroachDbTests(unittest.TestCase): - def __init__(self, methodName="runTest"): - super(CockroachDbLoaderCockroachDbTests, self).__init__(methodName) - self.__verbose = True - self.__createFlag = False - - def setUp(self): - self.__verbose = True - self.__numProc = 2 - self.__fileLimit = 100 - self.__workPath = os.path.join(HERE, "test-output") - self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data") - configPath = os.path.join(TOPDIR, "rcsb", "db", "config", "exdb-config-example.yml") - configName = "site_info_configuration" - self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName) - self.__resourceName = "COCKROACH_DB" - self.__schP = SchemaProvider(self.__cfgOb, self.__workPath, useCache=True) - self.__rpP = RepositoryProvider(cfgOb=self.__cfgOb, numProc=self.__numProc, fileLimit=self.__fileLimit, cachePath=self.__workPath) - # - self.__tableIdSkipD = {"ATOM_SITE": True, "ATOM_SITE_ANISOTROP": True} - self.__ioObj = IoAdapter(verbose=self.__verbose) - # - self.__startTime = time.time() - logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime())) - - def tearDown(self): - endTime = time.time() - logger.debug("Completed %s at %s (%.4f seconds)\n", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime) - - def testConnection(self): - try: - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - self.assertNotEqual(client, None) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testSchemaCreate(self): - """Create table schema (live) for BIRD, chemical component, and PDBx data.""" - try: - sd, _, _, _ = self.__schP.getSchemaInfo("bird") - ret = self.__schemaCreate(schemaDefObj=sd) - self.assertEqual(ret, True) - # - sd, _, _, _ = self.__schP.getSchemaInfo("chem_comp") - ret = self.__schemaCreate(schemaDefObj=sd) - self.assertEqual(ret, True) - # - sd, _, _, _ = self.__schP.getSchemaInfo("pdbx") - ret = self.__schemaCreate(schemaDefObj=sd) - self.assertEqual(ret, True) - # - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testSchemaRemove(self): - """Remove table schema (live) for BIRD, chemical component, and PDBx data.""" - try: - sd, _, _, _ = self.__schP.getSchemaInfo("bird") - ret = self.__schemaRemove(schemaDefObj=sd) - self.assertEqual(ret, True) - # - sd, _, _, _ = self.__schP.getSchemaInfo("chem_comp") - ret = self.__schemaRemove(schemaDefObj=sd) - self.assertEqual(ret, True) - # - sd, _, _, _ = self.__schP.getSchemaInfo("pdbx") - ret = self.__schemaRemove(schemaDefObj=sd) - self.assertEqual(ret, True) - # - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testLoadInsertBirdReference(self): - - try: - - sd, _, _, _ = self.__schP.getSchemaInfo("bird") - if self.__createFlag: - self.__schemaCreate(schemaDefObj=sd) - inputPathList = self.__rpP.getLocatorObjList("bird") - inputPathList.extend(self.__rpP.getLocatorObjList("bird_family")) - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - sdl = CockroachDbLoader(schemaDefObj=sd, ioObj=self.__ioObj, dbCon=client, workPath=self.__workPath, cleanUp=False, warnings="default", verbose=self.__verbose) - ret = sdl.load(inputPathList=inputPathList, loadType="cockroach-insert", deleteOpt="selected") - self.assertEqual(ret, True) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testLoadInsertManyBirdReference(self): - try: - sd, _, _, _ = self.__schP.getSchemaInfo("bird") - if self.__createFlag: - self.__schemaCreate(schemaDefObj=sd) - inputPathList = self.__rpP.getLocatorObjList("bird") - inputPathList.extend(self.__rpP.getLocatorObjList("bird_family")) - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - sdl = CockroachDbLoader(schemaDefObj=sd, ioObj=self.__ioObj, dbCon=client, workPath=self.__workPath, cleanUp=False, warnings="default", verbose=self.__verbose) - ret = sdl.load(inputPathList=inputPathList, loadType="cockroach-insert-many", deleteOpt="selected") - self.assertEqual(ret, True) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testLoadInsertChemCompReference(self): - - try: - sd, _, _, _ = self.__schP.getSchemaInfo("chem_comp") - if self.__createFlag: - self.__schemaCreate(schemaDefObj=sd) - inputPathList = self.__rpP.getLocatorObjList("chem_comp") - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - sdl = CockroachDbLoader(schemaDefObj=sd, ioObj=self.__ioObj, dbCon=client, workPath=self.__workPath, cleanUp=False, warnings="default", verbose=self.__verbose) - ret = sdl.load(inputPathList=inputPathList, loadType="cockroach-insert", deleteOpt="selected") - self.assertEqual(ret, True) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testLoadInsertManyChemCompReference(self): - - try: - sd, _, _, _ = self.__schP.getSchemaInfo("chem_comp") - if self.__createFlag: - self.__schemaCreate(schemaDefObj=sd) - inputPathList = self.__rpP.getLocatorObjList("chem_comp") - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - sdl = CockroachDbLoader(schemaDefObj=sd, ioObj=self.__ioObj, dbCon=client, workPath=self.__workPath, cleanUp=False, warnings="default", verbose=self.__verbose) - ret = sdl.load(inputPathList=inputPathList, loadType="cockroach-insert-many", deleteOpt="selected") - self.assertEqual(ret, True) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testLoadInsertPdbxExampleFiles(self): - try: - sd, _, _, _ = self.__schP.getSchemaInfo("pdbx") - if self.__createFlag: - self.__schemaCreate(schemaDefObj=sd) - inputPathList = self.__rpP.getLocatorObjList("pdbx") - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - sdl = CockroachDbLoader(schemaDefObj=sd, ioObj=self.__ioObj, dbCon=client, workPath=self.__workPath, cleanUp=False, warnings="default", verbose=self.__verbose) - ret = sdl.load(inputPathList=inputPathList, loadType="cockroach-insert", deleteOpt="selected", tableIdSkipD=self.__tableIdSkipD) - self.assertEqual(ret, True) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testLoadInsertManyPdbxExampleFiles(self): - try: - sd, _, _, _ = self.__schP.getSchemaInfo("pdbx") - if self.__createFlag: - self.__schemaCreate(schemaDefObj=sd) - inputPathList = self.__rpP.getLocatorObjList("pdbx") - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - sdl = CockroachDbLoader(schemaDefObj=sd, ioObj=self.__ioObj, dbCon=client, workPath=self.__workPath, cleanUp=False, warnings="default", verbose=self.__verbose) - ret = sdl.load(inputPathList=inputPathList, loadType="cockroach-insert-many", deleteOpt="selected", tableIdSkipD=self.__tableIdSkipD) - self.assertEqual(ret, True) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def __schemaCreateSQL(self, schemaDefObj): - """Test case - create table schema using schema definition""" - sqlL = [] - try: - tableIdList = schemaDefObj.getTableIdList() - sqlGen = SqlGenAdmin(self.__verbose, serverType="CockroachDb") - dbName = schemaDefObj.getVersionedDatabaseName() - sqlL = sqlGen.createDatabaseSQL(dbName) - for tableId in tableIdList: - tableDefObj = schemaDefObj.getTable(tableId) - sqlL.extend(sqlGen.createTableSQL(databaseName=schemaDefObj.getVersionedDatabaseName(), tableDefObj=tableDefObj)) - logger.debug("\nSchema creation SQL string\n %s\n\n", "\n".join(sqlL)) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - return sqlL - - def __schemaCreate(self, schemaDefObj): - """Test case - create table schema using schema definition""" - ret = 0 - try: - tableIdList = schemaDefObj.getTableIdList() - sqlGen = SqlGenAdmin(self.__verbose, serverType="CockroachDb") - dbName = schemaDefObj.getVersionedDatabaseName() - sqlL = sqlGen.createDatabaseSQL(dbName) - for tableId in tableIdList: - tableDefObj = schemaDefObj.getTable(tableId) - sqlL.extend(sqlGen.createTableSQL(databaseName=schemaDefObj.getVersionedDatabaseName(), tableDefObj=tableDefObj)) - - logger.debug("\nSchema creation SQL string\n %s\n\n", "\n".join(sqlL)) - logger.info("Creating schema using database %s", schemaDefObj.getVersionedDatabaseName()) - # - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - crQ = CockroachDbQuery(dbcon=client, verbose=self.__verbose) - ret = crQ.sqlCommandList(sqlCommandList=sqlL) - # ret = crQ.sqlCommand(' '.join(sqlL)) - logger.info("Schema create command returns %r\n", ret) - return ret - # - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def __schemaRemove(self, schemaDefObj): - """Test case - remove table schema using schema definition""" - ret = 0 - try: - dbName = schemaDefObj.getVersionedDatabaseName() - sqlGen = SqlGenAdmin(self.__verbose, serverType="CockroachDb") - sqlL = sqlGen.removeDatabaseSQL(dbName) - logger.debug("Schema Remove SQL string\n %s", "\n".join(sqlL)) - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - crQ = CockroachDbQuery(dbcon=client, verbose=self.__verbose) - ret = crQ.sqlCommandList(sqlCommandList=sqlL) - # ret = crQ.sqlCommand(' '.join(sqlL)) - logger.debug("Schema remove command returns %r\n", ret) - return ret - # - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - -def createConnectionSuite(): - suiteSelect = unittest.TestSuite() - suiteSelect.addTest(CockroachDbLoaderCockroachDbTests("testConnection")) - # suiteSelect.addTest(CockroachDbLoaderCockroachDbTests("testClusterConnections")) - return suiteSelect - - -def removeSchemaSuite(): - suiteSelect = unittest.TestSuite() - suiteSelect.addTest(CockroachDbLoaderCockroachDbTests("testSchemaRemove")) - return suiteSelect - - -def createSchemaSuite(): - suiteSelect = unittest.TestSuite() - suiteSelect.addTest(CockroachDbLoaderCockroachDbTests("testSchemaCreate")) - return suiteSelect - - -def loadBirdReferenceSuite(): - suiteSelect = unittest.TestSuite() - suiteSelect.addTest(CockroachDbLoaderCockroachDbTests("testLoadInsertBirdReference")) - suiteSelect.addTest(CockroachDbLoaderCockroachDbTests("testLoadInsertManyBirdReference")) - return suiteSelect - - -def loadCCReferenceSuite(): - suiteSelect = unittest.TestSuite() - suiteSelect.addTest(CockroachDbLoaderCockroachDbTests("testLoadInsertChemCompReference")) - suiteSelect.addTest(CockroachDbLoaderCockroachDbTests("testLoadInsertManyChemCompReference")) - return suiteSelect - - -def loadPdbxSuite(): - suiteSelect = unittest.TestSuite() - suiteSelect.addTest(CockroachDbLoaderCockroachDbTests("testLoadInsertPdbxExampleFiles")) - suiteSelect.addTest(CockroachDbLoaderCockroachDbTests("testLoadInsertManyPdbxExampleFiles")) - return suiteSelect - - -if __name__ == "__main__": - # - mySuite = createConnectionSuite() - unittest.TextTestRunner(verbosity=2).run(mySuite) - - mySuite = removeSchemaSuite() - unittest.TextTestRunner(verbosity=2).run(mySuite) - - mySuite = createSchemaSuite() - unittest.TextTestRunner(verbosity=2).run(mySuite) - - mySuite = loadBirdReferenceSuite() - unittest.TextTestRunner(verbosity=2).run(mySuite) - - mySuite = loadCCReferenceSuite() - unittest.TextTestRunner(verbosity=2).run(mySuite) - - mySuite = loadPdbxSuite() - unittest.TextTestRunner(verbosity=2).run(mySuite) -# diff --git a/rcsb/db/tests-cockroach/__init__.py b/rcsb/db/tests-cockroach/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/rcsb/db/tests-crate/testSchemaDefLoaderCrateDb.py b/rcsb/db/tests-crate/testSchemaDefLoaderCrateDb.py deleted file mode 100644 index 9ed43984..00000000 --- a/rcsb/db/tests-crate/testSchemaDefLoaderCrateDb.py +++ /dev/null @@ -1,325 +0,0 @@ -## -# File: SchemaDefLoaderCrateDbTests.py -# Author: J. Westbrook -# Date: 21-Dec-2017 -# Version: 0.001 -# -# Updates: -# -# 2-Apr-2018 jdw -## -""" -Tests for creating and loading distributed rdbms database using PDBx/mmCIF data files -and external schema definitions using CrateDb services - Covers BIRD, CCD and PDBx/mmCIF -model files - - -The following test settings from the enviroment will be used will a fallback to localhost/4200. - - CRATE_DB_HOST - CRATE_DB_HOST - -See also the load length limit for each file type for testing - Set to None to remove - - - self.__fileLimit = 100 - -""" - -__docformat__ = "restructuredtext en" -__author__ = "John Westbrook" -__email__ = "jwest@rcsb.rutgers.edu" -__license__ = "Apache 2.0" - - -import logging -import os -import time -import unittest - -from rcsb.db.crate.Connection import Connection -from rcsb.db.crate.CrateDbLoader import CrateDbLoader -from rcsb.db.crate.CrateDbUtil import CrateDbQuery -from rcsb.db.sql.SqlGen import SqlGenAdmin -from rcsb.utils.repository.RepositoryProvider import RepositoryProvider -from rcsb.db.utils.SchemaProvider import SchemaProvider -from rcsb.utils.config.ConfigUtil import ConfigUtil - -logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s") -logger = logging.getLogger() - -try: - from mmcif.io.IoAdapterCore import IoAdapterCore as IoAdapter -except Exception: - from mmcif.io.IoAdapterPy import IoAdapterPy as IoAdapter - - -logging.basicConfig(level=logging.DEBUG, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s") -logger = logging.getLogger() - -HERE = os.path.abspath(os.path.dirname(__file__)) -TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE))) - - -class SchemaDefLoaderCrateDbTests(unittest.TestCase): - def __init__(self, methodName="runTest"): - super(SchemaDefLoaderCrateDbTests, self).__init__(methodName) - self.__verbose = True - self.__debug = False - - def setUp(self): - self.__verbose = True - self.__numProc = 2 - self.__fileLimit = 100 - self.__workPath = os.path.join(HERE, "test-output") - self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data") - configPath = os.path.join(TOPDIR, "rcsb", "db", "config", "exdb-config-example.yml") - configName = "site_info_configuration" - self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName) - self.__resourceName = "CRATE_DB" - self.__schP = SchemaProvider(self.__cfgOb, self.__workPath, useCache=True) - self.__rpP = RepositoryProvider(cfgOb=self.__cfgOb, numProc=self.__numProc, fileLimit=self.__fileLimit, cachePath=self.__workPath) - # - self.__tableIdSkipD = {"ATOM_SITE": True, "ATOM_SITE_ANISOTROP": True, "__LOAD_STATUS__": True} - self.__ioObj = IoAdapter(verbose=self.__verbose) - # - self.__startTime = time.time() - logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime())) - - def tearDown(self): - endTime = time.time() - logger.debug("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime) - - def testConnection(self): - try: - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - self.assertNotEqual(client, None) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testSchemaCreate(self): - """Create table schema (live) for BIRD, chemical component, and PDBx data.""" - try: - sd, _, _, _ = self.__schP.getSchemaInfo(databaseName="bird") - ret = self.__schemaCreate(schemaDefObj=sd) - self.assertEqual(ret, True) - # - sd, _, _, _ = self.__schP.getSchemaInfo(databaseName="chem_comp") - ret = self.__schemaCreate(schemaDefObj=sd) - self.assertEqual(ret, True) - # - sd, _, _, _ = self.__schP.getSchemaInfo(databaseName="pdbx") - ret = self.__schemaCreate(schemaDefObj=sd) - self.assertEqual(ret, True) - # - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testSchemaRemove(self): - """Remove table schema (live) for BIRD, chemical component, and PDBx data.""" - try: - sd, _, _, _ = self.__schP.getSchemaInfo(databaseName="bird") - ret = self.__schemaRemove(schemaDefObj=sd) - self.assertEqual(ret, True) - # - sd, _, _, _ = self.__schP.getSchemaInfo(databaseName="chem_comp") - ret = self.__schemaRemove(schemaDefObj=sd) - self.assertEqual(ret, True) - # - sd, _, _, _ = self.__schP.getSchemaInfo(databaseName="pdbx") - ret = self.__schemaRemove(schemaDefObj=sd) - self.assertEqual(ret, True) - # - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testLoadInsertBirdReference(self): - - try: - sd, _, _, _ = self.__schP.getSchemaInfo(databaseName="bird") - self.__schemaCreate(schemaDefObj=sd) - inputPathList = self.__rpP.getLocatorObjList(contentType="bird") - inputPathList.extend(self.__rpP.getLocatorObjList(contentType="bird_family")) - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - sdl = CrateDbLoader(schemaDefObj=sd, ioObj=self.__ioObj, dbCon=client, workPath=self.__workPath, cleanUp=False, warnings="default", verbose=self.__verbose) - ret = sdl.load(inputPathList=inputPathList, loadType="crate-insert", deleteOpt="selected") - self.assertEqual(ret, True) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testLoadInsertManyBirdReference(self): - try: - sd, _, _, _ = self.__schP.getSchemaInfo(databaseName="bird") - self.__schemaCreate(schemaDefObj=sd) - inputPathList = self.__rpP.getLocatorObjList(contentType="bird") - inputPathList.extend(self.__rpP.getLocatorObjList(contentType="bird_family")) - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - sdl = CrateDbLoader(schemaDefObj=sd, ioObj=self.__ioObj, dbCon=client, workPath=self.__workPath, cleanUp=False, warnings="default", verbose=self.__verbose) - ret = sdl.load(inputPathList=inputPathList, loadType="crate-insert-many", deleteOpt="selected") - self.assertEqual(ret, True) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testLoadInsertChemCompReference(self): - - try: - sd, _, _, _ = self.__schP.getSchemaInfo(databaseName="chem_comp") - self.__schemaCreate(schemaDefObj=sd) - inputPathList = self.__rpP.getLocatorObjList(contentType="chem_comp") - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - sdl = CrateDbLoader(schemaDefObj=sd, ioObj=self.__ioObj, dbCon=client, workPath=self.__workPath, cleanUp=False, warnings="default", verbose=self.__verbose) - ret = sdl.load(inputPathList=inputPathList, loadType="crate-insert", deleteOpt="selected") - self.assertEqual(ret, True) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testLoadInsertManyChemCompReference(self): - - try: - sd, _, _, _ = self.__schP.getSchemaInfo(databaseName="chem_comp") - self.__schemaCreate(schemaDefObj=sd) - inputPathList = self.__rpP.getLocatorObjList(contentType="chem_comp") - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - sdl = CrateDbLoader(schemaDefObj=sd, ioObj=self.__ioObj, dbCon=client, workPath=self.__workPath, cleanUp=False, warnings="default", verbose=self.__verbose) - ret = sdl.load(inputPathList=inputPathList, loadType="crate-insert-many", deleteOpt="selected") - self.assertEqual(ret, True) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testLoadInsertPdbxExampleFiles(self): - try: - sd, _, _, _ = self.__schP.getSchemaInfo(databaseName="pdbx") - self.__schemaCreate(schemaDefObj=sd) - inputPathList = self.__rpP.getLocatorObjList(contentType="pdbx") - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - sdl = CrateDbLoader(schemaDefObj=sd, ioObj=self.__ioObj, dbCon=client, workPath=self.__workPath, cleanUp=False, warnings="default", verbose=self.__verbose) - ret = sdl.load(inputPathList=inputPathList, loadType="crate-insert", deleteOpt="selected", tableIdSkipD=self.__tableIdSkipD) - self.assertEqual(ret, True) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testLoadInsertManyPdbxExampleFiles(self): - try: - sd, _, _, _ = self.__schP.getSchemaInfo(databaseName="pdbx") - self.__schemaCreate(schemaDefObj=sd) - inputPathList = self.__rpP.getLocatorObjList(contentType="pdbx") - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - sdl = CrateDbLoader(schemaDefObj=sd, ioObj=self.__ioObj, dbCon=client, workPath=self.__workPath, cleanUp=False, warnings="default", verbose=self.__verbose) - ret = sdl.load(inputPathList=inputPathList, loadType="crate-insert-many", deleteOpt="selected", tableIdSkipD=self.__tableIdSkipD) - self.assertEqual(ret, True) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def __schemaCreate(self, schemaDefObj): - """Test case - create table schema using schema definition""" - ret = 0 - try: - tableIdList = schemaDefObj.getTableIdList() - sqlGen = SqlGenAdmin(self.__verbose, serverType="cratedb") - sqlL = [] - for tableId in tableIdList: - if tableId in self.__tableIdSkipD: - continue - tableDefObj = schemaDefObj.getTable(tableId) - sqlL.extend(sqlGen.createTableSQL(databaseName=schemaDefObj.getVersionedDatabaseName(), tableDefObj=tableDefObj)) - - logger.debug("\nSchema creation SQL string\n %s\n\n", "\n".join(sqlL)) - logger.info("Creating schema using database %s", schemaDefObj.getVersionedDatabaseName()) - # - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - crQ = CrateDbQuery(dbcon=client, verbose=self.__verbose) - ret = crQ.sqlCommandList(sqlCommandList=sqlL) - logger.debug("Schema create command returns %r\n", ret) - return ret - # - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def __schemaRemove(self, schemaDefObj): - """Test case - remove table schema using schema definition""" - ret = 0 - try: - tableIdList = schemaDefObj.getTableIdList() - sqlGen = SqlGenAdmin(self.__verbose, serverType="cratedb") - sqlL = [] - for tableId in tableIdList: - if tableId in self.__tableIdSkipD: - continue - tableDefObj = schemaDefObj.getTable(tableId) - sqlL.extend(sqlGen.dropTableSQL(databaseName=schemaDefObj.getVersionedDatabaseName(), tableDefObj=tableDefObj)) - sqlL.extend(sqlGen.dropTableSQL(databaseName=schemaDefObj.getDatabaseName(), tableDefObj=tableDefObj)) - - logger.debug("Schema Remove SQL string\n %s", "\n".join(sqlL)) - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - crQ = CrateDbQuery(dbcon=client, verbose=self.__verbose) - ret = crQ.sqlCommandList(sqlCommandList=sqlL) - logger.debug("Schema remove command returns %r\n", ret) - return ret - # - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - -def createConnectionSuite(): - suiteSelect = unittest.TestSuite() - suiteSelect.addTest(SchemaDefLoaderCrateDbTests("testConnection")) - return suiteSelect - - -def createSchemaSuite(): - suiteSelect = unittest.TestSuite() - suiteSelect.addTest(SchemaDefLoaderCrateDbTests("testSchemaCreate")) - suiteSelect.addTest(SchemaDefLoaderCrateDbTests("testSchemaRemove")) - return suiteSelect - - -def loadBirdReferenceSuite(): - suiteSelect = unittest.TestSuite() - suiteSelect.addTest(SchemaDefLoaderCrateDbTests("testLoadInsertBirdReference")) - suiteSelect.addTest(SchemaDefLoaderCrateDbTests("testLoadInsertManyBirdReference")) - return suiteSelect - - -def loadCCReferenceSuite(): - suiteSelect = unittest.TestSuite() - suiteSelect.addTest(SchemaDefLoaderCrateDbTests("testLoadInsertChemCompReference")) - suiteSelect.addTest(SchemaDefLoaderCrateDbTests("testLoadInsertManyChemCompReference")) - return suiteSelect - - -def loadPdbxSuite(): - suiteSelect = unittest.TestSuite() - suiteSelect.addTest(SchemaDefLoaderCrateDbTests("testLoadInsertPdbxExampleFiles")) - suiteSelect.addTest(SchemaDefLoaderCrateDbTests("testLoadInsertManyPdbxExampleFiles")) - return suiteSelect - - -if __name__ == "__main__": - # - mySuite = createConnectionSuite() - unittest.TextTestRunner(verbosity=2).run(mySuite) - - mySuite = createSchemaSuite() - unittest.TextTestRunner(verbosity=2).run(mySuite) - # - - mySuite = loadBirdReferenceSuite() - unittest.TextTestRunner(verbosity=2).run(mySuite) - # - - mySuite = loadCCReferenceSuite() - unittest.TextTestRunner(verbosity=2).run(mySuite) - # - - mySuite = loadPdbxSuite() - unittest.TextTestRunner(verbosity=2).run(mySuite) -# diff --git a/rcsb/db/tests-crate/testSchemaDefLoaderCrateDbMulti.py b/rcsb/db/tests-crate/testSchemaDefLoaderCrateDbMulti.py deleted file mode 100644 index 0ff840dc..00000000 --- a/rcsb/db/tests-crate/testSchemaDefLoaderCrateDbMulti.py +++ /dev/null @@ -1,291 +0,0 @@ -## -# File: SchemaDefLoaderCrateDbMultiTests.py -# Author: J. Westbrook -# Date: 10-Feb-2018 -# Version: 0.001 -# -# Updates: -# 2-Apr-2018 jdw update for refactored api's an utils -# -## -""" -Tests for creating and loading distributed rdbms database using PDBx/mmCIF data files -and external schema definitions using crateDb services - Covers BIRD, CCD and PDBx/mmCIF -model files - Multiprocessor mode tests - -The following test settings from the configuration file be used will a fallback to localhost/26257. - - CRATE_DB_USER_NAME - [CRATE_DB_PW] - CRATE_DB_NAME - CRATE_DB_HOST - -See also the load length limit for each file type for testing - Set to None to remove - - - self.__fileLimit = 100 - -""" - -__docformat__ = "restructuredtext en" -__author__ = "John Westbrook" -__email__ = "jwest@rcsb.rutgers.edu" -__license__ = "Apache 2.0" - - -import logging -import os -import time -import unittest - -from rcsb.db.crate.Connection import Connection -from rcsb.db.crate.CrateDbLoader import CrateDbLoader -from rcsb.db.crate.CrateDbUtil import CrateDbQuery - -# -from rcsb.db.sql.SqlGen import SqlGenAdmin -from rcsb.utils.repository.RepositoryProvider import RepositoryProvider -from rcsb.db.utils.SchemaProvider import SchemaProvider -from rcsb.utils.config.ConfigUtil import ConfigUtil -from rcsb.utils.multiproc.MultiProcUtil import MultiProcUtil - -logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s") -logger = logging.getLogger() - -try: - from mmcif.io.IoAdapterCore import IoAdapterCore as IoAdapter -except Exception: - from mmcif.io.IoAdapterPy import IoAdapterPy as IoAdapter - - -logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s") -logger = logging.getLogger() - -HERE = os.path.abspath(os.path.dirname(__file__)) -TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE))) - - -class SchemaDefLoadercrateDbMultiTests(unittest.TestCase): - def __init__(self, methodName="runTest"): - super(SchemaDefLoadercrateDbMultiTests, self).__init__(methodName) - self.__verbose = True - self.__createFlag = True - - def setUp(self): - self.__verbose = True - self.__numProc = 2 - self.__fileLimit = 100 - self.__chunkSize = 0 - self.__workPath = os.path.join(HERE, "test-output") - self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data") - configPath = os.path.join(TOPDIR, "rcsb", "db", "config", "exdb-config-example.yml") - configName = "site_info_configuration" - self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName) - self.__resourceName = "CRATE_DB" - self.__schP = SchemaProvider(self.__cfgOb, self.__workPath, useCache=True) - self.__rpP = RepositoryProvider(cfgOb=self.__cfgOb, numProc=self.__numProc, fileLimit=self.__fileLimit, cachePath=self.__workPath) - # - # - self.__tableIdSkipD = {"ATOM_SITE": True, "ATOM_SITE_ANISOTROP": True, "__LOAD_STATUS__": True} - self.__ioObj = IoAdapter(verbose=self.__verbose) - # - self.__startTime = time.time() - logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime())) - - def tearDown(self): - endTime = time.time() - logger.debug("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime) - - def testConnection(self): - try: - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - self.assertNotEqual(client, None) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testSchemaCreate(self): - """Create table schema (live) for BIRD, chemical component, and PDBx data.""" - try: - sd, _, _, _ = self.__schP.getSchemaInfo("bird") - ret = self.__schemaCreate(schemaDefObj=sd) - self.assertEqual(ret, True) - # - sd, _, _, _ = self.__schP.getSchemaInfo("chem_comp") - ret = self.__schemaCreate(schemaDefObj=sd) - self.assertEqual(ret, True) - # - sd, _, _, _ = self.__schP.getSchemaInfo("pdbx") - ret = self.__schemaCreate(schemaDefObj=sd) - self.assertEqual(ret, True) - # - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testSchemaRemove(self): - """Remove table schema (live) for BIRD, chemical component, and PDBx data.""" - try: - sd, _, _, _ = self.__schP.getSchemaInfo("bird") - ret = self.__schemaRemove(schemaDefObj=sd) - self.assertEqual(ret, True) - # - sd, _, _, _ = self.__schP.getSchemaInfo("chem_comp") - ret = self.__schemaRemove(schemaDefObj=sd) - self.assertEqual(ret, True) - # - sd, _, _, _ = self.__schP.getSchemaInfo("pdbx") - ret = self.__schemaRemove(schemaDefObj=sd) - self.assertEqual(ret, True) - # - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testLoadChemCompMulti(self): - self.__testLoadFilesMulti("chem_comp") - - def testLoadBirdMulti(self): - self.__testLoadFilesMulti("bird") - - def testLoadPdbxMulti(self): - self.__testLoadFilesMulti("pdbx") - - def __getPathList(self, fType): - pathList = [] - if fType == "chem_comp": - pathList = self.__rpP.getLocatorObjList("chem_comp") - elif fType == "bird": - pathList = self.__rpP.getLocatorObjList("bird") - pathList.extend(self.__rpP.getLocatorObjList("bird_family")) - elif fType == "pdbx": - pathList = self.__rpP.getLocatorObjList("pdbx") - return pathList - - def loadInsertMany(self, dataList, procName, optionsD, workingDir): - - try: - _ = workingDir - ret = None - sd = optionsD["sd"] - skipD = optionsD["skip"] - ioObj = IoAdapter(verbose=self.__verbose) - logger.debug("%s pathlist %r", procName, dataList) - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - sdl = CrateDbLoader(schemaDefObj=sd, ioObj=ioObj, dbCon=client, workPath=self.__workPath, cleanUp=False, warnings="default", verbose=self.__verbose) - ret = sdl.load(inputPathList=dataList, loadType="crate-insert-many", deleteOpt="selected", tableIdSkipD=skipD) - # all or nothing here - if ret: - return dataList, dataList, [] - else: - return [], [], [] - except Exception as e: - logger.info("Failing with dataList %r", dataList) - logger.exception("Failing with %s", str(e)) - - return [], [], [] - - def __testLoadFilesMulti(self, contentType): - """Test case - create load w/insert-many all chemical component definition data files - (multiproc test)""" - numProc = self.__numProc - chunkSize = self.__chunkSize - try: - # - sd, _, _, _ = self.__schP.getSchemaInfo(contentType) - if self.__createFlag: - self.__schemaCreate(schemaDefObj=sd) - - optD = {} - optD["sd"] = sd - if contentType == "pdbx": - optD["skip"] = self.__tableIdSkipD - else: - optD["skip"] = {} - - # - pathList = self.__getPathList(fType=contentType) - logger.debug("Input path list %r", pathList) - mpu = MultiProcUtil(verbose=True) - mpu.setOptions(optionsD=optD) - mpu.set(workerObj=self, workerMethod="loadInsertMany") - ok, _, _, _ = mpu.runMulti(dataList=pathList, numProc=numProc, numResults=1, chunkSize=chunkSize) - self.assertEqual(ok, True) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def __schemaCreate(self, schemaDefObj): - """Test case - create table schema using schema definition""" - ret = 0 - try: - tableIdList = schemaDefObj.getTableIdList() - sqlGen = SqlGenAdmin(self.__verbose, serverType="cratedb") - sqlL = [] - for tableId in tableIdList: - if tableId in self.__tableIdSkipD: - continue - tableDefObj = schemaDefObj.getTable(tableId) - sqlL.extend(sqlGen.createTableSQL(databaseName=schemaDefObj.getVersionedDatabaseName(), tableDefObj=tableDefObj)) - - logger.debug("Schema creation SQL string\n %s\n\n", "\n".join(sqlL)) - logger.info("Creating schema using database %s", schemaDefObj.getVersionedDatabaseName()) - # - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - crQ = CrateDbQuery(dbcon=client, verbose=self.__verbose) - ret = crQ.sqlCommandList(sqlCommandList=sqlL) - logger.debug("Schema create command returns %r\n", ret) - return ret - # - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def __schemaRemove(self, schemaDefObj): - """Test case - remove table schema using schema definition""" - ret = 0 - try: - tableIdList = schemaDefObj.getTableIdList() - sqlGen = SqlGenAdmin(self.__verbose, serverType="cratedb") - sqlL = [] - for tableId in tableIdList: - if tableId in self.__tableIdSkipD: - continue - tableDefObj = schemaDefObj.getTable(tableId) - sqlL.extend(sqlGen.dropTableSQL(databaseName=schemaDefObj.getVersionedDatabaseName(), tableDefObj=tableDefObj)) - sqlL.extend(sqlGen.dropTableSQL(databaseName=schemaDefObj.getDatabaseName(), tableDefObj=tableDefObj)) - - logger.debug("Schema Remove SQL string\n %s", "\n".join(sqlL)) - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - crQ = CrateDbQuery(dbcon=client, verbose=self.__verbose) - ret = crQ.sqlCommandList(sqlCommandList=sqlL) - logger.debug("Schema remove command returns %r\n", ret) - return ret - # - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - -def baseSuite(): - suiteSelect = unittest.TestSuite() - suiteSelect.addTest(SchemaDefLoadercrateDbMultiTests("testConnection")) - suiteSelect.addTest(SchemaDefLoadercrateDbMultiTests("testSchemaCreate")) - suiteSelect.addTest(SchemaDefLoadercrateDbMultiTests("testSchemaRemove")) - return suiteSelect - - -def loadSuite(): - suiteSelect = unittest.TestSuite() - suiteSelect.addTest(SchemaDefLoadercrateDbMultiTests("testConnection")) - suiteSelect.addTest(SchemaDefLoadercrateDbMultiTests("testLoadChemCompMulti")) - suiteSelect.addTest(SchemaDefLoadercrateDbMultiTests("testLoadBirdMulti")) - suiteSelect.addTest(SchemaDefLoadercrateDbMultiTests("testLoadPdbxMulti")) - return suiteSelect - - -if __name__ == "__main__": - - mySuite = baseSuite() - unittest.TextTestRunner(verbosity=2).run(mySuite) - - mySuite = loadSuite() - unittest.TextTestRunner(verbosity=2).run(mySuite) diff --git a/rcsb/db/tests-mysql/__init__.py b/rcsb/db/tests-mysql/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/rcsb/db/tests-mysql/test-output/.gitkeep b/rcsb/db/tests-mysql/test-output/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/rcsb/db/tests-mysql/testConnection.py b/rcsb/db/tests-mysql/testConnection.py deleted file mode 100644 index 8c6768a1..00000000 --- a/rcsb/db/tests-mysql/testConnection.py +++ /dev/null @@ -1,181 +0,0 @@ -## -# -# File: ConnectionTests.py -# Author: J. Westbrook -# Date: 26-Mar-2018 -# Version: 0.001 -# -# Updates: -# 30-Mar-2018 jdw add tests for context manager style opens -# 25-Oct-2018 jdw add section name to connnection resource assignment method -## -""" -Test cases opening database connections. - -""" -__docformat__ = "restructuredtext en" -__author__ = "John Westbrook" -__email__ = "jwest@rcsb.rutgers.edu" -__license__ = "Apache 2.0" - - -import logging -import os -import sys -import time -import unittest - -from rcsb.db.mysql.Connection import Connection -from rcsb.db.mysql.MyDbUtil import MyDbQuery -from rcsb.utils.config.ConfigUtil import ConfigUtil - -logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s") -logger = logging.getLogger() - -HERE = os.path.abspath(os.path.dirname(__file__)) -TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE))) - - -class ConnectionTests(unittest.TestCase): - def setUp(self): - self.__dbName = "test_database" - self.__lfh = sys.stderr - self.__verbose = True - self.__myC = None - - configPath = os.path.join(TOPDIR, "rcsb", "db", "config", "exdb-config-example.yml") - configName = "site_info_configuration" - self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName) - self.__resourceName = "MYSQL_DB" - self.__connectD = self.__assignResource(self.__cfgOb, resourceName=self.__resourceName, sectionName=configName) - - self.__startTime = time.time() - logger.debug("Starting at %s", time.strftime("%Y %m %d %H:%M:%S", time.localtime())) - - def tearDown(self): - endTime = time.time() - logger.debug("Completed at %s (%.4f seconds)", time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime) - - def __assignResource(self, cfgOb, sectionName="site_info_configuration", resourceName="MYSQL_DB"): - cn = Connection(cfgOb=cfgOb) - return cn.assignResource(resourceName=resourceName, sectionName=sectionName) - - def __open(self, connectD): - cObj = Connection() - cObj.setPreferences(connectD) - ok = cObj.openConnection() - if ok: - return cObj - else: - return None - - def __close(self, cObj): - if cObj is not None: - cObj.closeConnection() - return True - else: - return False - - def __getClientConnection(self, cObj): - return cObj.getClientConnection() - - def testCreateConnectionContext(self): - """Test case - connection creation using context manager""" - try: - with Connection(cfgOb=self.__cfgOb, resourceName="MYSQL_DB") as client: - self.assertNotEqual(client, None) - - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testCreateConnection(self): - """Test case - connection creation""" - try: - cObj = self.__open(self.__connectD) - client = self.__getClientConnection(cObj) - self.assertNotEqual(client, None) - ok = self.__close(cObj) - self.assertTrue(ok) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testCreateMultipleConnections(self): - """Test case - multiple connection creation""" - try: - for _ in range(100): - cObj = self.__open(self.__connectD) - client = self.__getClientConnection(cObj) - self.assertNotEqual(client, None) - ok = self.__close(cObj) - self.assertTrue(ok) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testCreateMultipleConnectionsContext(self): - """Test case - multiple connection creation""" - try: - for _ in range(100): - with Connection(cfgOb=self.__cfgOb, resourceName="MYSQL_DB") as client: - self.assertNotEqual(client, None) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testCreateMultipleConnectionsWithQuery(self): - """Test case - multiple connection creation""" - try: - for ii in range(100): - cObj = self.__open(self.__connectD) - client = self.__getClientConnection(cObj) - self.assertNotEqual(client, None) - for jj in range(100): - my = MyDbQuery(dbcon=client) - ok = my.testSelectQuery(count=ii + jj) - self.assertTrue(ok) - ok = self.__close(cObj) - self.assertTrue(ok) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testCreateMultipleConnWithQueryContext(self): - """Test case - multiple connection creation""" - try: - for ii in range(100): - with Connection(cfgOb=self.__cfgOb, resourceName="MYSQL_DB") as client: - self.assertNotEqual(client, None) - for jj in range(100): - my = MyDbQuery(dbcon=client) - ok = my.testSelectQuery(count=ii + jj) - self.assertTrue(ok) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - -def suiteOpen(): - suiteSelect = unittest.TestSuite() - suiteSelect.addTest(ConnectionTests("testCreateConnection")) - suiteSelect.addTest(ConnectionTests("testCreateMultipleConnections")) - suiteSelect.addTest(ConnectionTests("testCreateMultipleConnectionsWithQuery")) - return suiteSelect - - -def suiteOpenContext(): - suiteSelect = unittest.TestSuite() - suiteSelect.addTest(ConnectionTests("testCreateConnectionContext")) - suiteSelect.addTest(ConnectionTests("testCreateMultipleConnectionsContext")) - suiteSelect.addTest(ConnectionTests("testCreateMultipleConnWithQueryContext")) - return suiteSelect - - -if __name__ == "__main__": - - mySuite = suiteOpen() - unittest.TextTestRunner(verbosity=2).run(mySuite) - - mySuite = suiteOpenContext() - unittest.TextTestRunner(verbosity=2).run(mySuite) diff --git a/rcsb/db/tests-mysql/testSchemaDefLoaderDb.py b/rcsb/db/tests-mysql/testSchemaDefLoaderDb.py deleted file mode 100644 index 538b52b0..00000000 --- a/rcsb/db/tests-mysql/testSchemaDefLoaderDb.py +++ /dev/null @@ -1,257 +0,0 @@ -## -# File: SchemaDefLoaderDbTests.py -# Author: J. Westbrook -# Date: 29-Mar-2018 -# Version: 0.001 -# -# Updates: -# 20-Jun-2018 jdw updates for new schema generation and data preparation tools -# -## -""" -Tests for creating and loading rdbms database (mysql) using PDBx/mmCIF data files -and external schema definition. - -""" - -__docformat__ = "restructuredtext en" -__author__ = "John Westbrook" -__email__ = "jwest@rcsb.rutgers.edu" -__license__ = "Apache 2.0" - - -import logging -import os -import platform -import time -import unittest - -from rcsb.db.define.SchemaDefAccess import SchemaDefAccess -from rcsb.db.mysql.Connection import Connection -from rcsb.db.mysql.MyDbUtil import MyDbQuery -from rcsb.db.mysql.SchemaDefLoader import SchemaDefLoader -from rcsb.db.sql.SqlGen import SqlGenAdmin -from rcsb.utils.repository.RepositoryProvider import RepositoryProvider -from rcsb.db.utils.SchemaProvider import SchemaProvider -from rcsb.utils.config.ConfigUtil import ConfigUtil - -logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s") -logger = logging.getLogger() - -HERE = os.path.abspath(os.path.dirname(__file__)) -TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE))) - - -class SchemaDefLoaderDbTests(unittest.TestCase): - def __init__(self, methodName="runTest"): - super(SchemaDefLoaderDbTests, self).__init__(methodName) - self.__verbose = True - - def setUp(self): - self.__isMac = platform.system() == "Darwin" - self.__excludeTypeL = None if self.__isMac else ["optional"] - self.__verbose = True - # - fileLimit = 100 - numProc = 2 - self.__cachePath = os.path.join(TOPDIR, "CACHE") - self.__workPath = os.path.join(HERE, "test-output") - mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data") - configPath = os.path.join(TOPDIR, "rcsb", "db", "config", "exdb-config-example.yml") - # - configName = "site_info_configuration" - self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=mockTopPath) - self.__resourceName = "MYSQL_DB" - # - self.__schP = SchemaProvider(self.__cfgOb, self.__cachePath, useCache=True) - self.__rpP = RepositoryProvider(cfgOb=self.__cfgOb, numProc=numProc, fileLimit=fileLimit, cachePath=self.__cachePath) - # - # - self.__startTime = time.time() - logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime())) - - def tearDown(self): - endTime = time.time() - logger.debug("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime) - - def __schemaCreate(self, schemaDefObj): - """Create table schema using schema definition""" - try: - tableIdList = schemaDefObj.getSchemaIdList() - sqlGen = SqlGenAdmin(self.__verbose) - sqlL = sqlGen.createDatabaseSQL(schemaDefObj.getDatabaseName()) - for tableId in tableIdList: - tableDefObj = schemaDefObj.getSchemaObject(tableId) - sqlL.extend(sqlGen.createTableSQL(databaseName=schemaDefObj.getDatabaseName(), tableDefObj=tableDefObj)) - - logger.debug("Schema creation SQL string\n %s\n\n", "\n".join(sqlL)) - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - myQ = MyDbQuery(dbcon=client, verbose=self.__verbose) - # - # Permit warnings to support "drop table if exists" for missing tables. - # - myQ.setWarning("ignore") - ret = myQ.sqlCommand(sqlCommandList=sqlL) - logger.debug("\n\n+INFO mysql server returns %r\n", ret) - self.assertTrue(ret) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - # ------------- - ------------------------------------------------------------------------------------------- - def testSchemaCreate(self): - """Create table schema for BIRD, chemical component, and PDBx data.""" - cD = self.__schP.makeSchemaDef("bird", dataTyping="SQL", saveSchema=True) - sd = SchemaDefAccess(cD) - self.__schemaCreate(sd) - # - cD = self.__schP.makeSchemaDef("chem_comp", dataTyping="SQL", saveSchema=True) - sd = SchemaDefAccess(cD) - self.__schemaCreate(sd) - # - # cD = self.__schP.makeSchemaDef("pdbx", dataTyping="SQL", saveSchema=True) - # sd = SchemaDefAccess(cD) - self.__schemaCreate(sd) - - def testLoadBirdReference(self): - try: - cD = self.__schP.makeSchemaDef("bird", dataTyping="SQL", saveSchema=True) - sd = SchemaDefAccess(cD) - self.__schemaCreate(sd) - - inputPathList = self.__rpP.getLocatorObjList(contentType="bird") - inputPathList.extend(self.__rpP.getLocatorObjList(contentType="bird_family")) - # - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - sdl = SchemaDefLoader( - self.__cfgOb, - schemaDefObj=sd, - dbCon=client, - cachePath=self.__cachePath, - workPath=self.__workPath, - cleanUp=False, - warnings="error", - verbose=self.__verbose, - restoreUseStash=False, - restoreUseGit=True, - providerTypeExcludeL=self.__excludeTypeL, - ) - ok = sdl.load(inputPathList=inputPathList, loadType="batch-file") - self.assertTrue(ok) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testReLoadBirdReference(self): - try: - cD = self.__schP.makeSchemaDef("bird", dataTyping="SQL", saveSchema=True) - sd = SchemaDefAccess(cD) - self.__schemaCreate(sd) - - inputPathList = self.__rpP.getLocatorObjList(contentType="bird") - inputPathList.extend(self.__rpP.getLocatorObjList(contentType="bird_family")) - # - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - sdl = SchemaDefLoader( - self.__cfgOb, - schemaDefObj=sd, - dbCon=client, - cachePath=self.__cachePath, - workPath=self.__workPath, - cleanUp=False, - warnings="error", - verbose=self.__verbose, - restoreUseStash=False, - restoreUseGit=True, - providerTypeExcludeL=self.__excludeTypeL, - ) - sdl.load(inputPathList=inputPathList, loadType="batch-file") - # - logger.debug("INFO BATCH FILE RELOAD TEST --------------------------------------------\n") - ok = sdl.load(inputPathList=inputPathList, loadType="batch-file", deleteOpt="all") - self.assertTrue(ok) - # - logger.debug("\n\n\n+INFO BATCH INSERT RELOAD TEST --------------------------------------------\n") - ok = sdl.load(inputPathList=inputPathList, loadType="batch-file", deleteOpt="selected") - self.assertTrue(ok) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def testLoadChemCompReference(self): - try: - cD = self.__schP.makeSchemaDef("chem_comp", dataTyping="SQL", saveSchema=True) - sd = SchemaDefAccess(cD) - self.__schemaCreate(sd) - - inputPathList = self.__rpP.getLocatorObjList(contentType="chem_comp") - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - sdl = SchemaDefLoader( - self.__cfgOb, - schemaDefObj=sd, - dbCon=client, - cachePath=self.__cachePath, - workPath=self.__workPath, - cleanUp=False, - warnings="error", - verbose=self.__verbose, - restoreUseStash=False, - restoreUseGit=True, - providerTypeExcludeL=self.__excludeTypeL, - ) - ok = sdl.load(inputPathList=inputPathList, loadType="batch-file") - self.assertTrue(ok) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - @unittest.skip("Disable test - schema not optimized for mysql limitations") - def testLoadPdbxFiles(self): - try: - cD = self.__schP.makeSchemaDef("pdbx", dataTyping="SQL", saveSchema=True) - sd = SchemaDefAccess(cD) - self.__schemaCreate(sd) - - inputPathList = self.__rpP.getLocatorObjList(contentType="pdbx") - logger.debug("Input path list %r", inputPathList) - with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: - sdl = SchemaDefLoader( - self.__cfgOb, - schemaDefObj=sd, - dbCon=client, - cachePath=self.__cachePath, - workPath=self.__workPath, - cleanUp=False, - warnings="error", - verbose=self.__verbose, - restoreUseStash=False, - restoreUseGit=True, - providerTypeExcludeL=self.__excludeTypeL, - ) - ok = sdl.load(inputPathList=inputPathList, loadType="batch-insert", deleteOpt="all") - self.assertTrue(ok) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - -def createSchemaSuite(): - suiteSelect = unittest.TestSuite() - suiteSelect.addTest(SchemaDefLoaderDbTests("testSchemaCreate")) - return suiteSelect - - -def loadReferenceSuite(): - suiteSelect = unittest.TestSuite() - suiteSelect.addTest(SchemaDefLoaderDbTests("testLoadBirdReference")) - suiteSelect.addTest(SchemaDefLoaderDbTests("testReLoadBirdReference")) - suiteSelect.addTest(SchemaDefLoaderDbTests("testLoadChemCompReference")) - # suiteSelect.addTest(SchemaDefLoaderDbTests("testLoadPdbxFiles")) - return suiteSelect - - -if __name__ == "__main__": - mySuite = createSchemaSuite() - unittest.TextTestRunner(verbosity=2).run(mySuite) - mySuite = loadReferenceSuite() - unittest.TextTestRunner(verbosity=2).run(mySuite) diff --git a/rcsb/db/tests/testLoggerStream.py b/rcsb/db/tests/testLoggerStream.py deleted file mode 100644 index ef301d6c..00000000 --- a/rcsb/db/tests/testLoggerStream.py +++ /dev/null @@ -1,28 +0,0 @@ -import logging -import unittest -from io import StringIO - - -class MyTest(unittest.TestCase): - def setUp(self): - self.stream = StringIO() - self.handler = logging.StreamHandler(self.stream) - self.log = logging.getLogger("mylogger") - self.log.setLevel(logging.INFO) - for handler in self.log.handlers: - self.log.removeHandler(handler) - self.log.addHandler(self.handler) - - def testLog(self): - self.log.info("test message") - self.handler.flush() - # print('[', self.stream.getvalue(), ']') - self.assertTrue(self.stream.getvalue(), "test message") - - def tearDown(self): - self.log.removeHandler(self.handler) - self.handler.close() - - -if __name__ == "__main__": - unittest.main() diff --git a/rcsb/db/tests/testSqlGen.py b/rcsb/db/tests/testSqlGen.py deleted file mode 100644 index 05e4b83d..00000000 --- a/rcsb/db/tests/testSqlGen.py +++ /dev/null @@ -1,158 +0,0 @@ -## -# File: SqlGenTests.py -# Author: J. Westbrook -# Date: 31-Jan-2012 -# Version: 0.001 -# -# Updates: 20-Dec-2017 jdw py2/py3 working in compat23 branch -# 12-Mar-2018 jdw refactor for Python Packaging - -# 6-Jul-2018 jdw Update for new schema def prototypes -## -""" -Test cases for SQL command generation -- no data connections required for these tests -- - -""" -__docformat__ = "restructuredtext en" -__author__ = "John Westbrook" -__email__ = "jwest@rcsb.rutgers.edu" -__license__ = "Apache 2.0" - -import logging -import os -import sys -import time -import unittest - -from rcsb.db.define.SchemaDefAccess import SchemaDefAccess -from rcsb.db.sql.SqlGen import SqlGenAdmin, SqlGenCondition, SqlGenQuery -from rcsb.db.utils.SchemaProvider import SchemaProvider -from rcsb.utils.config.ConfigUtil import ConfigUtil - -logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s") -logger = logging.getLogger() -logger.setLevel(logging.INFO) - -HERE = os.path.abspath(os.path.dirname(__file__)) -TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE))) - - -class SqlGenTests(unittest.TestCase): - def setUp(self): - self.__verbose = True - # - mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data") - pathConfig = os.path.join(TOPDIR, "rcsb", "db", "config", "exdb-config-example.yml") - self.__cachePath = os.path.join(TOPDIR, "CACHE") - # - configName = "site_info_configuration" - self.__cfgOb = ConfigUtil(configPath=pathConfig, defaultSectionName=configName, mockTopPath=mockTopPath) - self.__sdu = SchemaProvider(self.__cfgOb, self.__cachePath, useCache=True) - # - - self.__startTime = time.time() - logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime())) - - def testSQLMethods(self): - schemaNames = ["pdbx_core"] - dataTyping = "SQL" - for schemaName in schemaNames: - dD = self.__sdu.makeSchemaDef(schemaName, dataTyping=dataTyping, saveSchema=False) - sD = SchemaDefAccess(dD) - self.__testSchemaCreate(sD) - self.__testImportExport(sD) - self.__testSelectionAndConditions(sD) - - # - - def __getHelper(self, modulePath, **kwargs): - aMod = __import__(modulePath, globals(), locals(), [""]) - sys.modules[modulePath] = aMod - # - # Strip off any leading path to the module before we instaniate the object. - mpL = modulePath.split(".") - moduleName = mpL[-1] - # - aObj = getattr(aMod, moduleName)(**kwargs) - return aObj - - def __testSchemaCreate(self, sD): - """Test case - create table schema using input schema definition as an example""" - - try: - tableIdList = sD.getSchemaIdList() - myAd = SqlGenAdmin(self.__verbose) - sqlL = [] - for tableId in tableIdList: - tableDefObj = sD.getSchemaObject(tableId) - sqlL.extend(myAd.createTableSQL(databaseName=sD.getDatabaseName(), tableDefObj=tableDefObj)) - logger.debug("\n\n+SqlGenTests table creation SQL string\n %s\n\n", "\n".join(sqlL)) - self.assertGreaterEqual(len(sqlL), 10) - - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def __testImportExport(self, sD): - """Test case - import and export commands --""" - - try: - databaseName = sD.getDatabaseName() - tableIdList = sD.getSchemaIdList() - myAd = SqlGenAdmin(self.__verbose) - for tableId in tableIdList: - tableDefObj = sD.getSchemaObject(tableId) - exportPath = os.path.join(HERE, "test-output", tableDefObj.getName() + ".tdd") - sqlExport = myAd.exportTable(databaseName, tableDefObj, exportPath=exportPath) - logger.debug("\n\n+SqlGenTests table export SQL string\n %s\n\n", sqlExport) - sqlImport = myAd.importTable(databaseName, tableDefObj, importPath=exportPath) - logger.debug("\n\n+SqlGenTests table import SQL string\n %s\n\n", sqlImport) - self.assertGreaterEqual(len(sqlImport), 100) - - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - def __testSelectionAndConditions(self, sD): - """Test case - selection everything for a simple condition-""" - try: - # get delete attribute - - # - tableIdList = sD.getSchemaIdList() - logger.debug("TableIdList %r", tableIdList) - sqlGen = SqlGenQuery(schemaDefObj=sD, verbose=self.__verbose) - - for tableId in tableIdList: - tableDefObj = sD.getSchemaObject(tableId) - dAtId = tableDefObj.getDeleteAttributeId() - - if dAtId: - sqlCondition = SqlGenCondition(schemaDefObj=sD, verbose=self.__verbose) - sqlCondition.addValueCondition((tableId, dAtId), "EQ", ("D000001", "CHAR")) - aIdList = sD.getAttributeIdList(tableId) - for aId in aIdList: - sqlGen.addSelectAttributeId(attributeTuple=(tableId, aId)) - sqlGen.setCondition(sqlCondition) - sqlGen.addOrderByAttributeId(attributeTuple=(tableId, dAtId)) - sqlS = sqlGen.getSql() - logger.debug("\n\n+SqlGenTests table creation SQL string\n %s\n\n", sqlS) - self.assertGreaterEqual(len(sqlS), 50) - sqlGen.clear() - else: - logger.debug("Missing delete atttribe for table %r", tableId) - except Exception as e: - logger.exception("Failing with %s", str(e)) - self.fail() - - -def suiteSQLMethods(): - suiteSelect = unittest.TestSuite() - suiteSelect.addTest(SqlGenTests("testSQLMethods")) - return suiteSelect - - -if __name__ == "__main__": - # Run all tests -- - # unittest.main() - # - mySuite = suiteSQLMethods() - unittest.TextTestRunner(verbosity=2).run(mySuite) diff --git a/rcsb/db/utils/SchemaProvider.py b/rcsb/db/utils/SchemaProvider.py index 85b47a3c..66579a3b 100644 --- a/rcsb/db/utils/SchemaProvider.py +++ b/rcsb/db/utils/SchemaProvider.py @@ -24,8 +24,6 @@ import os import pprint -# from jsondiff import diff - from rcsb.db.define.SchemaDefAccess import SchemaDefAccess from rcsb.db.define.SchemaDefBuild import SchemaDefBuild from rcsb.utils.io.FileUtil import FileUtil diff --git a/rcsb/db/utils/makePathList.py b/rcsb/db/utils/makePathList.py deleted file mode 100644 index 1e47bc6d..00000000 --- a/rcsb/db/utils/makePathList.py +++ /dev/null @@ -1,49 +0,0 @@ -## -# File makePathList.py -# Date: 18-Feb-2018 -# -# Get the current list of release entry files in our data release file system. -# -## -import logging -import os - -try: - import os.scandir as scandir -except Exception: - import scandir - -logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s") -logger = logging.getLogger() - - -def makePdbxPathList(fp, cachePath=".", skipFile=None): - """Return the list of pdbx file paths in the current repository.""" - - try: - skipD = {} - if skipFile and os.access(skipFile, "r"): - with open(skipFile, "r", encoding="utf-8") as ifh: - for line in ifh: - idcode = str(line[:-1]).strip().lower() + ".cif" - skipD[idcode] = idcode - logger.info("Skip list length %d", len(skipD)) - # - with open(fp, "w", encoding="utf-8") as ofh: - for root, _, files in scandir.walk(cachePath, topdown=False): - if "REMOVE" in root: - continue - for name in files: - if name.endswith(".cif") and len(name) == 8 and name not in skipD: - ofh.write("%s\n" % os.path.join(root, name)) - # - # logger.info("\nFound %d files in %s\n" % (len(pathList), cachePath)) - return True - except Exception as e: - logger.exception("Failing with %s", str(e)) - - return False - - -if __name__ == "__main__": - ok = makePdbxPathList("PDBXPATHLIST.txt", cachePath="/net/beta_data/mmcif-pdbx-load-v5.0", skipFile="./DONE.LIST") diff --git a/rcsb/db/utils/unescape.py b/rcsb/db/utils/unescape.py deleted file mode 100644 index 40c3eecf..00000000 --- a/rcsb/db/utils/unescape.py +++ /dev/null @@ -1,35 +0,0 @@ -# -# from six.moves.html_parser import HTMLParser - -try: - # Python 2.6-2.7 - from HTMLParser import unescape -except ImportError: - # Python 3.6+ - from html import unescape - - -def unescapeXmlCharRef(iStr): - """ - Convert html character entities into unicode. - """ - oStr = unescape(iStr) - - return oStr - - -if __name__ == "__main__": - print("START") - print("%r" % unescapeXmlCharRef("<b>")) - print("%r" % unescapeXmlCharRef("Here is a "").encode("utf-8")) - print("%r" % unescapeXmlCharRef("Here is a Φ").encode("utf-8")) - print("%range" % unescapeXmlCharRef("Here is a Ψ")) - print("%r" % unescapeXmlCharRef("Here is a α")) - print("%r" % unescapeXmlCharRef("Here is a £")) - - print("%r" % unescapeXmlCharRef("Here is a ℅")) - print("%r" % unescapeXmlCharRef("Here is a ☆")) - print("%r" % unescapeXmlCharRef("Here is a 𝕫")) - for ichar in range(1, 8000): - myStr = "decimal %6d char &#%d;" % (ichar, ichar) - print("%r" % unescapeXmlCharRef(myStr)) diff --git a/requirements.txt b/requirements.txt index 66f314e3..44b0069b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,23 +1,10 @@ -future -six python-dateutil pytz jsonschema >= 2.6.0 -jsondiff >= 1.2.0 -strict-rfc3339 -mysqlclient >= 1.3.12 pymongo >= 4.8.0 mmcif >= 0.91.0 rcsb.utils.config >= 0.40 rcsb.utils.io >= 1.46 rcsb.utils.multiproc >= 0.19 -rcsb.utils.chemref >= 0.91 -scandir; python_version < "3.0" -configparser; python_version < "3.0" rcsb.utils.dictionary >= 1.29 rcsb.utils.repository >= 0.48 -# -SQLAlchemy == 1.4.46; sys_platform == 'linux' -# For crate and cockroach - -crate -psycopg2-binary diff --git a/setup.py b/setup.py index 6eec4736..2cec467a 100755 --- a/setup.py +++ b/setup.py @@ -70,7 +70,7 @@ # # These basic tests require no database services - test_suite="rcsb.db.tests", - tests_require=["tox", "jsonschema", "strict-rfc3339"], + tests_require=["tox", "jsonschema", "rcsb.utils.chemref >= 0.91", "jsondiff >= 1.2.0"], # # Not configured ... extras_require={"dev": ["check-manifest"], "test": ["coverage"]}, diff --git a/tox.ini b/tox.ini index 277d050d..cf671eed 100644 --- a/tox.ini +++ b/tox.ini @@ -16,15 +16,14 @@ fixture_path_1 = "rcsb/db/tests" test_path_1 = "rcsb/db/tests" test_path_2 = "rcsb/db/tests-mongo" test_path_3 = "rcsb/db/cli" -test_path_4 = "rcsb/db/cli" +test_path_4 = "rcsb/db/cli" # TODO: is this a duplicate? # These are placeholders valid source directories without tests files #test_path_2 = "rcsb/db/tests-validate" #test_path_3 = "rcsb/db/tests-mongo" -#test_path_4 = "rcsb/db/tests-mysql" # # Comma separate list of directories for which test coverage will be evaluated coverage_source_paths = "rcsb/db" -coverage_exclude_paths = "rcsb/db/cli/*,rcsb/db/cockroach/*,rcsb/db/crate/*,rcsb/db/tests-crate/*,rcsb/db/tests-cockroach/*" +coverage_exclude_paths = "rcsb/db/cli/*" coverage_cutoff = 65 # ## -------------------------------------------------------------------------- @@ -122,4 +121,4 @@ commands = # isort -rc rcsb/utils --check-only echo "Completed {envname}" -# \ No newline at end of file +#