From 4945dd711d8b4c32f5d90ac64b3d4bfbd772a560 Mon Sep 17 00:00:00 2001
From: jwest <john.westbrook@rcsb.org>
Date: Mon, 15 Feb 2021 10:24:36 -0500
Subject: [PATCH] V0.11 Created module with content migrated from rcsb.db

---
 .gitignore                                    |    5 +-
 .gitmodules                                   |    3 +
 HISTORY.txt                                   |    3 +
 MANIFEST.in                                   |    5 +
 README.md                                     |   45 +-
 azure-pipelines.yml                           |   45 +
 azure-template-publish-job.yml                |   61 +
 azure-template-tox-job.yml                    |  202 +
 pylintrc                                      |  588 +++
 rcsb/__init__.py                              |    1 +
 rcsb/mock-data                                |    1 +
 rcsb/utils/__init__.py                        |    1 +
 .../dictionary/DictMethodAssemblyHelper.py    |  746 +++
 .../dictionary/DictMethodChemRefHelper.py     |  937 ++++
 .../utils/dictionary/DictMethodCommonUtils.py | 3994 +++++++++++++++++
 .../dictionary/DictMethodEntityHelper.py      | 1960 ++++++++
 .../DictMethodEntityInstanceHelper.py         | 1776 ++++++++
 .../utils/dictionary/DictMethodEntryHelper.py | 1214 +++++
 .../dictionary/DictMethodResourceProvider.py  |  393 ++
 rcsb/utils/dictionary/DictMethodRunner.py     |  227 +
 .../utils/dictionary/DictionaryApiProvider.py |  113 +
 .../DictionaryApiProviderWrapper.py           |   74 +
 rcsb/utils/dictionary/__init__.py             |    5 +
 rcsb/utils/tests-dictionary/__init__.py       |    0
 .../tests-dictionary/testDictMethodRunner.py  |  129 +
 .../testDictionaryApiProvider.py              |   87 +
 .../testDictionaryApiProviderWrapper.py       |  106 +
 requirements.txt                              |   11 +
 setup.cfg                                     |    7 +
 setup.py                                      |   71 +
 tox.ini                                       |  170 +
 31 files changed, 12977 insertions(+), 3 deletions(-)
 create mode 100644 .gitmodules
 create mode 100644 HISTORY.txt
 create mode 100644 MANIFEST.in
 create mode 100644 azure-pipelines.yml
 create mode 100644 azure-template-publish-job.yml
 create mode 100644 azure-template-tox-job.yml
 create mode 100644 pylintrc
 create mode 100644 rcsb/__init__.py
 create mode 160000 rcsb/mock-data
 create mode 100644 rcsb/utils/__init__.py
 create mode 100644 rcsb/utils/dictionary/DictMethodAssemblyHelper.py
 create mode 100644 rcsb/utils/dictionary/DictMethodChemRefHelper.py
 create mode 100644 rcsb/utils/dictionary/DictMethodCommonUtils.py
 create mode 100644 rcsb/utils/dictionary/DictMethodEntityHelper.py
 create mode 100644 rcsb/utils/dictionary/DictMethodEntityInstanceHelper.py
 create mode 100644 rcsb/utils/dictionary/DictMethodEntryHelper.py
 create mode 100644 rcsb/utils/dictionary/DictMethodResourceProvider.py
 create mode 100644 rcsb/utils/dictionary/DictMethodRunner.py
 create mode 100644 rcsb/utils/dictionary/DictionaryApiProvider.py
 create mode 100644 rcsb/utils/dictionary/DictionaryApiProviderWrapper.py
 create mode 100644 rcsb/utils/dictionary/__init__.py
 create mode 100644 rcsb/utils/tests-dictionary/__init__.py
 create mode 100644 rcsb/utils/tests-dictionary/testDictMethodRunner.py
 create mode 100644 rcsb/utils/tests-dictionary/testDictionaryApiProvider.py
 create mode 100644 rcsb/utils/tests-dictionary/testDictionaryApiProviderWrapper.py
 create mode 100644 requirements.txt
 create mode 100755 setup.cfg
 create mode 100755 setup.py
 create mode 100644 tox.ini

diff --git a/.gitignore b/.gitignore
index b6e4761..be2084b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,7 +2,10 @@
 __pycache__/
 *.py[cod]
 *$py.class
-
+test-output/
+CACHE/
+LOGTOX
+.tox/
 # C extensions
 *.so
 
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..ef7431d
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "rcsb/mock-data"]
+	path = rcsb/mock-data
+	url = https://github.com/rcsb/py-rcsb_mock-data.git
diff --git a/HISTORY.txt b/HISTORY.txt
new file mode 100644
index 0000000..db7c475
--- /dev/null
+++ b/HISTORY.txt
@@ -0,0 +1,3 @@
+# File: HISTORY.txt
+#
+14-Feb-2021 - V0.11 Created module with content migrated from rcsb.db
\ No newline at end of file
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..d36a05f
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,5 @@
+#
+# File: py-rcsb_utils_chemref/MANIFEST.in
+#
+include HISTORY.txt
+#
diff --git a/README.md b/README.md
index 850eac0..fc1b546 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,43 @@
-# py-rcsb_utils_dictionary
-RCSB dictionary method utilities
+# RCSB Dictionary Method Utilities
+
+## A collection of Python Dictionary Method Utilities
+
+## Introduction
+
+This module contains a collection of utility classes for defining and managing the execution of
+dynamic dictionary methods.
+
+### Installation
+
+Install via [pip](https://pypi.python.org/pypi/pip).
+
+```bash
+pip install rcsb.utils.dictionary
+```
+
+Or, to install from the source, download the library source software from the project repository:
+
+```bash
+
+git clone --recurse-submodules https://github.com/rcsb/py-rcsb_utils_dictionary.git
+
+```
+
+Optionally, run test suite (Python versions 3.7+) using
+[setuptools](https://setuptools.readthedocs.io/en/latest/) or
+[tox](http://tox.readthedocs.io/en/latest/example/platform.html):
+
+```bash
+python setup.py test
+
+or simply run
+
+tox
+```
+
+Installation is via the program [pip](https://pypi.python.org/pypi/pip).  To run tests
+from the source tree, the package must be installed in editable mode (i.e. -e):
+
+```bash
+pip install -e .
+```
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
new file mode 100644
index 0000000..060822c
--- /dev/null
+++ b/azure-pipelines.yml
@@ -0,0 +1,45 @@
+# File: azure-pipelines.yml
+# Date: 4-Jul-2019 jdw Created pipeline
+#       8-Jul-2019 jdw add macos test/publish
+#       8-Jul-2019 jdw templatize
+#       9-Jul-2019 jdw add publish jobs
+#      21-Jul-2019 jdw update to Py38
+#      10-Mar-2020 jdw py38 only
+#      10-Dec-2020 jdw py39 only
+#
+name: $(BuildDefinitionName)_$(Date:yyyyMMdd)$(Rev:.rr)
+
+trigger:
+- master
+- utilsdict-*
+
+pr:
+- master
+
+schedules:
+- cron: "0 12 * * 0"
+  displayName: Weekly Sunday build
+  branches:
+    include:
+    - master
+  always: true
+
+jobs:
+  - template: azure-template-tox-job.yml
+    parameters: {tox: 'format_pep8', python: '3.9', os: 'linux'}
+  - template: azure-template-tox-job.yml
+    parameters: {tox: 'lint_pylint', python: '3.9', os: 'linux'}
+  - template: azure-template-tox-job.yml
+    parameters: {tox: 'test_coverage', python: '3.9', os: 'linux'}
+    #
+  - template: azure-template-tox-job.yml
+    parameters: {tox: 'py39', python: '3.9', os: 'linux'}
+  #
+  - template: azure-template-tox-job.yml
+    parameters: {tox: 'py39', python: '3.9', os: 'macos'}
+  #
+  #- template: azure-template-publish-job.yml
+  #  parameters: {tox: 'py39', python: '3.9', os: 'macos'}
+  - template: azure-template-publish-job.yml
+    parameters: {tox: 'py39', python: '3.9', os: 'linux'}
+  #
diff --git a/azure-template-publish-job.yml b/azure-template-publish-job.yml
new file mode 100644
index 0000000..e5cb74e
--- /dev/null
+++ b/azure-template-publish-job.yml
@@ -0,0 +1,61 @@
+# File: azure-template-publish-job.yml
+# Date: 8-Jul-2019 jdw split out from original pipeline
+#
+# Update:
+# 6-Aug-2019 jdw push source wheels (universal) for linux
+#
+##
+parameters:
+  tox: ""
+  python: ""
+  os: "linux"
+  fixtures: ""
+
+jobs:
+- job: ${{ format('publish_{0}_{1}', parameters.tox, parameters.os) }}
+  pool:
+    ${{ if eq(parameters.os, 'macos') }}:
+      vmImage: 'macOS-10.15'
+    ${{ if eq(parameters.os, 'linux') }}:
+      vmImage: 'ubuntu-20.04'
+  dependsOn:
+  - ${{ format('build_test_{0}_{1}', parameters.tox, parameters.os) }}
+  condition: and(succeeded(), ne(variables['Build.Reason'], 'PullRequest'), eq(variables['Build.SourceBranch'], 'refs/heads/master'))
+  #
+  steps:
+    - task: UsePythonVersion@0
+      inputs:
+        versionSpec: ${{ parameters.python }}
+        addToPath: true
+      displayName: setup python
+    #
+    #- checkout: self
+    #  submodules: true
+    #
+    - download: current
+      artifact: ${{ format('sw_{0}_{1}', parameters.tox,  parameters.os) }}
+
+    - download: current
+      artifact: ${{ format('sw_u_{0}_{1}', parameters.tox,  parameters.os) }}
+    #
+    - script: ls -lR $(Pipeline.Workspace)/${{ format('sw_{0}_{1}', parameters.tox, parameters.os) }}
+      displayName: "Listing of downloaded artifacts"
+    #
+    - script: python -m pip install --upgrade pip twine setuptools wheel
+      displayName: 'Install packaging tools'
+    #
+    - task: DownloadSecureFile@1
+      name: pypicred
+      displayName: 'Download PyPI credentials'
+      inputs:
+        secureFile: 'PYPIRC-AZURE'
+
+    - ${{ if startsWith(parameters.os, 'linux') }}:
+      - script: twine upload --verbose --skip-existing -r pypi  --config-file $(pypicred.secureFilePath) $(Pipeline.Workspace)/${{ format('sw_u_{0}_{1}', parameters.tox, parameters.os) }}/*
+        displayName: "Linux upload sdist and source wheel to PyPi ..."
+        continueOnError: true
+    #
+    - ${{ if startsWith(parameters.os, 'macos') }}:
+      - script: twine upload --verbose --skip-existing -r pypi  --config-file $(pypicred.secureFilePath) $(Pipeline.Workspace)/${{ format('sw_{0}_{1}', parameters.tox, parameters.os) }}/*
+        displayName: "Mac upload sdist and binary wheel to PyPi ..."
+        continueOnError: true
\ No newline at end of file
diff --git a/azure-template-tox-job.yml b/azure-template-tox-job.yml
new file mode 100644
index 0000000..d8c1e1a
--- /dev/null
+++ b/azure-template-tox-job.yml
@@ -0,0 +1,202 @@
+# File: azure-template-tox-job.yml
+# Date: 8-Jul-2019 jdw split out from original pipeline
+# Supports: fixtures=mysql,mongodb (linux)
+#
+# Updates:
+#  6-Aug-2019  jdw build source and binary wheels by default.
+# 13-Aug-2019  jdw export config support token prior to launching tox runner
+#
+##
+parameters:
+  tox: ""
+  python: ""
+  os: "linux"
+  fixtures: ""
+
+jobs:
+- job: ${{ format('build_test_{0}_{1}', parameters.tox, parameters.os) }}
+  timeoutInMinutes: 0
+  pool:
+    ${{ if eq(parameters.os, 'macos') }}:
+      vmImage: 'macOS-10.15'
+    ${{ if eq(parameters.os, 'linux') }}:
+      vmImage: 'ubuntu-20.04'
+
+  variables:
+    - group: py-shared-variables
+
+  steps:
+    #
+    # ensure the required Python versions are available
+    - task: UsePythonVersion@0
+      inputs:
+        versionSpec: ${{ parameters.python }}
+        addToPath: true
+      displayName: setup python
+    #
+    - checkout: self
+      submodules: true
+    #
+    - ${{ if startsWith(parameters.os, 'macos') }}:
+      - bash: |
+          set -e
+          ls -la /Applications/Xcode*
+          sudo xcode-select --switch /Applications/Xcode_12.2.app/Contents/Developer
+          which g++
+          c++ --version
+        displayName: "setup Xcode"
+      #
+      - script: which brew
+        displayName: 'Check package manager'
+      - script: brew install flex
+        displayName: 'Install flex'
+      - script: which flex
+        displayName: 'Check flex'
+      - script: brew install bison
+        displayName: 'Install bison'
+      - script: which bison
+        displayName: 'Check bison'
+    # ----------------------------------------------
+    - ${{ if startsWith(parameters.os, 'linux') }}:
+      - script: lsb_release -a
+        displayName: 'Ubuntu kernal version'
+      - script: which apt
+        displayName: 'Installing OS dependencies'
+      - script: apt-cache policy | grep http | awk '{print $2 $3}' | sort -u
+        displayName: 'Checking for repos'
+      #
+      - script: sudo apt-get update
+        displayName: 'update apt'
+      #- script: sudo apt-get upgrade
+      #  displayName: 'upgrade apt'
+      #- script: sudo apt-get update
+      #  displayName: 'update apt'
+      - script: sudo apt-get install flex
+        displayName: 'Install flex'
+      - script: sudo apt-get install bison
+        displayName: 'Install bison'
+  #
+    - ${{ if and(contains(parameters.fixtures, 'mysql'), startsWith(parameters.os, 'linux')) }}:
+      - bash: |
+          sudo apt-get install python3-dev mysql-server
+          sudo apt-get install default-libmysqlclient-dev
+          sudo apt-get install python-mysqldb
+          sudo apt list --installed | grep -i mysql
+        displayName: 'Install mysql development libraries'
+      - bash: |
+          echo "Retarting mysql service"
+          sudo systemctl restart mysql.service
+          mysql -V
+          mysql --user=root --password=root -e "use mysql; select * from user;"
+          #
+          echo "Try resetting password"
+          mysqladmin --user=root --password=root password 'ChangeMeSoon'
+          #
+          # mysql -u root  -p root -e "SET PASSWORD FOR root@'localhost' = PASSWORD(‘ChangeMeSoon’);"
+          # mysql -u root  -p root -e "FLUSH PRIVILEGES; update mysql.user set password=password('ChangeMeSoon') where user='root'; FLUSH PRIVILEGES;"
+          # UPDATE mysql.user SET Password=PASSWORD('ChangeMeSoon') WHERE User='root';
+
+          echo "Running preliminary mysql setup"
+          mysql --user=root --password=ChangeMeSoon <<_EOF_
+            DELETE FROM mysql.user WHERE User='';
+            DELETE FROM mysql.user WHERE User='root' AND Host NOT IN ('localhost', '127.0.0.1', '::1');
+            DROP DATABASE IF EXISTS test;
+            DELETE FROM mysql.db WHERE Db='test' OR Db='test\\_%';
+            FLUSH PRIVILEGES;
+          _EOF_
+          ps -ef | grep -i my
+          mysql --user=root --password=ChangeMeSoon -e "show databases;"
+          #
+        displayName: 'Start and configure mysql ...'
+      # -----
+
+    - ${{ if and(contains(parameters.fixtures, 'mongodb'), startsWith(parameters.os, 'linux')) }}:
+      # Mongo install
+      - script: |
+          sudo apt-get install gnupg wget
+          wget -qO - https://www.mongodb.org/static/pgp/server-4.2.asc | sudo apt-key add -
+          # sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 9DA31620334BD75D9DCB49F368818C72E52529D4
+          sudo apt list --installed | grep mongodb
+          echo "deb [ arch=amd64,arm64 ] https://repo.mongodb.org/apt/ubuntu bionic/mongodb-org/4.2 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-4.2.list
+          # echo "deb [ arch=amd64,arm64 ] https://repo.mongodb.org/apt/ubuntu xenial/mongodb-org/4.2 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-4.2.list
+          sudo apt-get update
+          sudo apt-get install -y mongodb-org
+          sudo apt list --installed | grep mongo
+        displayName: "Installing mongodb"
+      #
+      - script:  |
+          sudo service mongod start
+          sudo ss -tulpn
+        displayName: "Start Mongo service"
+      #
+    #
+    - script: "python -c \"import sys; print(sys.version); print(sys.executable)\""
+      displayName: show python information
+    #
+    - script: python -m pip install --upgrade pip tox
+      displayName: 'Install tools'
+      #
+    - script: pip install -r requirements.txt
+      displayName: 'Install dependencies'
+    #
+    #
+    - task: DownloadSecureFile@1
+      name: oelicense
+      displayName: 'Download OE license file'
+      inputs:
+        secureFile: 'oe_license.txt'
+
+    - ${{ if startsWith(parameters.tox, 'py') }}:
+      - script: |
+          export OE_LICENSE=$(oelicense.secureFilePath)
+          export CONFIG_SUPPORT_TOKEN_ENV=$(VAR_CONFIG_SUPPORT_TOKEN_ENV)
+          ${{ format('python -m tox -e {0}', parameters.tox) }}
+        displayName: 'Running tox task'
+    - ${{ if and(not(startsWith(parameters.tox, 'py')), startsWith(parameters.python, '3.9')) }}:
+      - script: |
+          export OE_LICENSE=$(oelicense.secureFilePath)
+          export CONFIG_SUPPORT_TOKEN_ENV=$(VAR_CONFIG_SUPPORT_TOKEN_ENV)
+          ${{ format('python -m tox -e {0}-py39', parameters.tox) }}
+        displayName: 'Running tox task'
+    - ${{ if and(not(startsWith(parameters.tox, 'py')), startsWith(parameters.python, '3.8')) }}:
+      - script: |
+          export OE_LICENSE=$(oelicense.secureFilePath)
+          export CONFIG_SUPPORT_TOKEN_ENV=$(VAR_CONFIG_SUPPORT_TOKEN_ENV)
+          ${{ format('python -m tox -e {0}-py38', parameters.tox) }}
+        displayName: 'Running tox task'
+    - ${{ if and(not(startsWith(parameters.tox, 'py')), startsWith(parameters.python, '3.7')) }}:
+      - script: |
+          export OE_LICENSE=$(oelicense.secureFilePath)
+          export CONFIG_SUPPORT_TOKEN_ENV=$(VAR_CONFIG_SUPPORT_TOKEN_ENV)
+          ${{ format('python -m tox -e {0}-py37', parameters.tox) }}
+        displayName: 'Running tox task'
+    - ${{ if and(not(startsWith(parameters.tox, 'py')), startsWith(parameters.python, '2.7')) }}:
+      - script: |
+          export OE_LICENSE=$(oelicense.secureFilePath)
+          export CONFIG_SUPPORT_TOKEN_ENV=$(VAR_CONFIG_SUPPORT_TOKEN_ENV)
+          ${{ format('python -m tox -e {0}-py27', parameters.tox) }}
+        displayName: 'Runing tox task'
+    #
+    #  Build artifacts if this is a test target (i.e. labeled as py##)
+    #
+    - ${{ if startsWith(parameters.tox, 'py') }}:
+      - script: pip install --upgrade pip twine setuptools wheel
+        displayName: "Acquire build tools"
+      - script: python setup.py  sdist --dist-dir "$(System.DefaultWorkingDirectory)/dist"
+        displayName: "Build source dist"
+      - script: python setup.py  bdist_wheel --dist-dir "$(System.DefaultWorkingDirectory)/dist"
+        displayName: "Build wheel"
+    #
+      - script: python setup.py  sdist --dist-dir "$(System.DefaultWorkingDirectory)/udist"
+        displayName: "Build source dist"
+      #
+      # Check the install artifacts
+      - script: ls -lR "$(System.DefaultWorkingDirectory)/dist" "$(System.DefaultWorkingDirectory)/udist"
+        displayName: "Listing of installed software"
+      #
+      - publish: $(System.DefaultWorkingDirectory)/dist
+        artifact: ${{ format('sw_{0}_{1}', parameters.tox, parameters.os) }}
+      #
+      - publish: $(System.DefaultWorkingDirectory)/udist
+        artifact: ${{ format('sw_u_{0}_{1}', parameters.tox, parameters.os) }}
+      #
diff --git a/pylintrc b/pylintrc
new file mode 100644
index 0000000..6058ff4
--- /dev/null
+++ b/pylintrc
@@ -0,0 +1,588 @@
+[MASTER]
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code.
+extension-pkg-whitelist=MySQLdb,rdkit
+
+# Add files or directories to the blacklist. They should be base names, not
+# paths.
+ignore=CVS
+
+# Add files or directories matching the regex patterns to the blacklist. The
+# regex matches against base names, not paths.
+ignore-patterns=
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
+# number of processors available to use.
+jobs=1
+
+# Control the amount of potential inferred values when inferring a single
+# object. This can help the performance when dealing with large functions or
+# complex, nested conditions.
+limit-inference-results=100
+
+# List of plugins (as comma separated values of python modules names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# Specify a configuration file.
+#rcfile=
+
+# When enabled, pylint would attempt to guess common misconfiguration and emit
+# user-friendly hints instead of false-positive error messages.
+suggestion-mode=yes
+
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension=no
+
+
+[MESSAGES CONTROL]
+
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
+confidence=
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once). You can also use "--disable=all" to
+# disable everything first and then reenable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use "--disable=all --enable=classes
+# --disable=W".
+disable=missing-docstring,
+        empty-docstring,
+        bad-continuation,
+        print-statement,
+        parameter-unpacking,
+        unpacking-in-except,
+        old-raise-syntax,
+        backtick,
+        long-suffix,
+        old-ne-operator,
+        old-octal-literal,
+        import-star-module-level,
+        non-ascii-bytes-literal,
+        raw-checker-failed,
+        bad-inline-option,
+        locally-disabled,
+        file-ignored,
+        suppressed-message,
+        useless-suppression,
+        deprecated-pragma,
+        use-symbolic-message-instead,
+        broad-except,
+        apply-builtin,
+        basestring-builtin,
+        buffer-builtin,
+        cmp-builtin,
+        coerce-builtin,
+        execfile-builtin,
+        file-builtin,
+        long-builtin,
+        raw_input-builtin,
+        reduce-builtin,
+        standarderror-builtin,
+        unicode-builtin,
+        xrange-builtin,
+        coerce-method,
+        delslice-method,
+        getslice-method,
+        setslice-method,
+        no-absolute-import,
+        old-division,
+        dict-iter-method,
+        dict-view-method,
+        next-method-called,
+        metaclass-assignment,
+        indexing-exception,
+        raising-string,
+        reload-builtin,
+        oct-method,
+        hex-method,
+        nonzero-method,
+        cmp-method,
+        input-builtin,
+        round-builtin,
+        intern-builtin,
+        unichr-builtin,
+        map-builtin-not-iterating,
+        zip-builtin-not-iterating,
+        range-builtin-not-iterating,
+        filter-builtin-not-iterating,
+        using-cmp-argument,
+        eq-without-hash,
+        div-method,
+        idiv-method,
+        rdiv-method,
+        exception-message-attribute,
+        invalid-str-codec,
+        sys-max-int,
+        bad-python3-import,
+        deprecated-string-function,
+        deprecated-str-translate-call,
+        deprecated-itertools-function,
+        deprecated-types-field,
+        next-method-defined,
+        dict-items-not-iterating,
+        dict-keys-not-iterating,
+        dict-values-not-iterating,
+        deprecated-operator-function,
+        deprecated-urllib-function,
+        xreadlines-attribute,
+        deprecated-sys-function,
+        exception-escape,
+        comprehension-escape
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once). See also the "--disable" option for examples.
+enable=c-extension-no-member
+
+
+[REPORTS]
+
+# Python expression which should return a note less than 10 (10 is the highest
+# note). You have access to the variables errors warning, statement which
+# respectively contain the number of errors / warnings messages and the total
+# number of statements analyzed. This is used by the global evaluation report
+# (RP0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details.
+#msg-template=
+
+# Set the output format. Available formats are text, parseable, colorized, json
+# and msvs (visual studio). You can also give a reporter class, e.g.
+# mypackage.mymodule.MyReporterClass.
+output-format=text
+
+# Tells whether to display a full report or only the messages.
+reports=no
+
+# Activate the evaluation score.
+score=yes
+
+
+[REFACTORING]
+
+# Maximum number of nested blocks for function / method body
+max-nested-blocks=5
+
+# Complete name of functions that never returns. When checking for
+# inconsistent-return-statements if a never returning function is called then
+# it will be considered as an explicit return statement and no message will be
+# printed.
+never-returning-functions=sys.exit
+
+
+[LOGGING]
+
+# Format style used to check logging format string. `old` means using %
+# formatting, while `new` is for `{}` formatting.
+logging-format-style=old
+
+# Logging modules to check that the string format arguments are in logging
+# function parameter format.
+logging-modules=logging
+
+
+[SPELLING]
+
+# Limits count of emitted suggestions for spelling mistakes.
+max-spelling-suggestions=4
+
+# Spelling dictionary name. Available dictionaries: none. To make it working
+# install python-enchant package..
+spelling-dict=
+
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+
+# A path to a file that contains private dictionary; one word per line.
+spelling-private-dict-file=
+
+# Tells whether to store unknown words to indicated private dictionary in
+# --spelling-private-dict-file option instead of raising a message.
+spelling-store-unknown-words=no
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,
+      XXX,
+      TODO
+
+
+[TYPECHECK]
+
+# List of decorators that produce context managers, such as
+# contextlib.contextmanager. Add to this list to register other decorators that
+# produce valid context managers.
+contextmanager-decorators=contextlib.contextmanager
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E1101 when accessed. Python regular
+# expressions are accepted.
+generated-members=
+
+# Tells whether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# Tells whether to warn about missing members when the owner of the attribute
+# is inferred to be None.
+ignore-none=yes
+
+# This flag controls whether pylint should warn about no-member and similar
+# checks whenever an opaque object is returned when inferring. The inference
+# can return multiple potential results while evaluating a Python object, but
+# some branches might not be evaluated, which results in partial inference. In
+# that case, it might be useful to still emit no-member and other checks for
+# the rest of the inferred objects.
+ignore-on-opaque-inference=yes
+
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=optparse.Values,thread._local,_thread._local
+
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis. It
+# supports qualified module names, as well as Unix pattern matching.
+ignored-modules=
+
+# Show a hint with possible names when a member name was not found. The aspect
+# of finding the hint is based on edit distance.
+missing-member-hint=yes
+
+# The minimum edit distance a name should have in order to be considered a
+# similar match for a missing member name.
+missing-member-hint-distance=1
+
+# The total number of similar names that should be taken in consideration when
+# showing a hint for a missing member.
+missing-member-max-choices=1
+
+
+[VARIABLES]
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid defining new builtins when possible.
+additional-builtins=
+
+# Tells whether unused global variables should be treated as a violation.
+allow-global-unused-variables=yes
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,
+          _cb
+
+# A regular expression matching the name of dummy variables (i.e. expected to
+# not be used).
+dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
+
+# Argument names that match this expression will be ignored. Default to name
+# with leading underscore.
+ignored-argument-names=_.*|^ignored_|^unused_
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
+
+
+[FORMAT]
+
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=^\s*(# )?<?https?://\S+>?$
+
+# Number of spaces of indent required inside a hanging or continued line.
+indent-after-paren=4
+
+# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
+# tab).
+indent-string='    '
+
+# Maximum number of characters on a single line.
+max-line-length=185
+
+# Maximum number of lines in a module.
+max-module-lines=1000
+
+# List of optional constructs for which whitespace checking is disabled. `dict-
+# separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
+# `trailing-comma` allows a space between comma and closing bracket: (a, ).
+# `empty-line` allows space-only lines.
+no-space-check=trailing-comma,
+               dict-separator
+
+# Allow the body of a class to be on the same line as the declaration if body
+# contains single statement.
+single-line-class-stmt=no
+
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=no
+
+
+[SIMILARITIES]
+
+# Ignore comments when computing similarities.
+ignore-comments=yes
+
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+
+# Ignore imports when computing similarities.
+ignore-imports=no
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+
+[BASIC]
+
+# Naming style matching correct argument names.
+argument-naming-style=snake_case
+
+# Regular expression matching correct argument names. Overrides argument-
+# naming-style.
+argument-rgx=[a-z_][a-zA-Z0-9]{1,30}$
+
+# Naming style matching correct attribute names.
+attr-naming-style=snake_case
+
+# Regular expression matching correct attribute names. Overrides attr-naming-
+# style.
+attr-rgx=_?_?[a-z][A-Za-z0-9]{1,40}$
+
+# Bad variable names which should always be refused, separated by a comma.
+bad-names=foo,
+          bar,
+          baz,
+          toto,
+          tutu,
+          tata
+
+# Naming style matching correct class attribute names.
+class-attribute-naming-style=camelCase
+
+# Regular expression matching correct class attribute names. Overrides class-
+# attribute-naming-style.
+class-attribute-rgx=_?_?[a-z][A-Za-z0-9]{1,40}$
+
+# Naming style matching correct class names.
+class-naming-style=PascalCase
+
+# Regular expression matching correct class names. Overrides class-naming-
+# style.
+#class-rgx=
+
+# Naming style matching correct constant names.
+const-naming-style=any
+
+# Regular expression matching correct constant names. Overrides const-naming-
+# style.
+#const-rgx=
+
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=-1
+
+# Naming style matching correct function names.
+function-naming-style=camelCase
+
+# Regular expression matching correct function names. Overrides function-
+# naming-style.
+#function-rgx=
+
+# Good variable names which should always be accepted, separated by a comma.
+good-names=_,
+           i,
+           j,
+           k,
+           v,
+           ii,
+           jj,
+           kk,
+#           t,
+#           c,
+#           d,
+           e,
+#           r,
+#           s,
+#           v,
+#           p,
+#           ts,
+#           tS,
+           ok,
+           logger
+
+# Include a hint for the correct naming format with invalid-name.
+include-naming-hint=no
+
+# Naming style matching correct inline iteration names.
+inlinevar-naming-style=any
+
+# Regular expression matching correct inline iteration names. Overrides
+# inlinevar-naming-style.
+#inlinevar-rgx=
+
+# Naming style matching correct method names.
+method-naming-style=snake_case
+
+# Regular expression matching correct method names. Overrides method-naming-
+# style.
+method-rgx=_?_?[a-z][A-Za-z0-9]{1,40}_?_?$
+
+# Naming style matching correct module names.
+module-naming-style=any
+
+# Regular expression matching correct module names. Overrides module-naming-
+# style.
+#module-rgx=
+
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=
+
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx=^_
+
+# List of decorators that produce properties, such as abc.abstractproperty. Add
+# to this list to register other decorators that produce valid properties.
+# These decorators are taken in consideration only for invalid-name.
+property-classes=abc.abstractproperty
+
+# Naming style matching correct variable names.
+variable-naming-style=snake_case
+
+# Regular expression matching correct variable names. Overrides variable-
+# naming-style.
+variable-rgx=[a-z_][a-zA-Z0-9]{1,40}$
+
+
+[STRING]
+
+# This flag controls whether the implicit-str-concat-in-sequence should
+# generate a warning on implicit string concatenation in sequences defined over
+# several lines.
+check-str-concat-over-line-jumps=no
+
+
+[IMPORTS]
+
+# Allow wildcard imports from modules that define __all__.
+allow-wildcard-with-all=no
+
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks=no
+
+# Deprecated modules which should not be used, separated by a comma.
+deprecated-modules=optparse,tkinter.tix
+
+# Create a graph of external dependencies in the given file (report RP0402 must
+# not be disabled).
+ext-import-graph=
+
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report RP0402 must not be disabled).
+import-graph=
+
+# Create a graph of internal dependencies in the given file (report RP0402 must
+# not be disabled).
+int-import-graph=
+
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+
+# Force import order to recognize a module as part of a third party library.
+known-third-party=enchant
+
+
+[CLASSES]
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,
+                      __new__,
+                      setUp
+
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,
+                  _fields,
+                  _replace,
+                  _source,
+                  _make
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls
+
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=cls
+
+
+[DESIGN]
+
+# Maximum number of arguments for function / method.
+max-args=5
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Maximum number of boolean expressions in an if statement.
+max-bool-expr=5
+
+# Maximum number of branch for function / method body.
+max-branches=12
+
+# Maximum number of locals for function / method body.
+max-locals=15
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+# Maximum number of return / yield for function / method body.
+max-returns=6
+
+# Maximum number of statements in function / method body.
+max-statements=50
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when being caught. Defaults to
+# "BaseException, Exception".
+overgeneral-exceptions=BaseException,
+                       Exception
diff --git a/rcsb/__init__.py b/rcsb/__init__.py
new file mode 100644
index 0000000..8db66d3
--- /dev/null
+++ b/rcsb/__init__.py
@@ -0,0 +1 @@
+__path__ = __import__("pkgutil").extend_path(__path__, __name__)
diff --git a/rcsb/mock-data b/rcsb/mock-data
new file mode 160000
index 0000000..8bfc542
--- /dev/null
+++ b/rcsb/mock-data
@@ -0,0 +1 @@
+Subproject commit 8bfc542c445225d48d6f98f6b5f6099caf48dc46
diff --git a/rcsb/utils/__init__.py b/rcsb/utils/__init__.py
new file mode 100644
index 0000000..8db66d3
--- /dev/null
+++ b/rcsb/utils/__init__.py
@@ -0,0 +1 @@
+__path__ = __import__("pkgutil").extend_path(__path__, __name__)
diff --git a/rcsb/utils/dictionary/DictMethodAssemblyHelper.py b/rcsb/utils/dictionary/DictMethodAssemblyHelper.py
new file mode 100644
index 0000000..0f590f3
--- /dev/null
+++ b/rcsb/utils/dictionary/DictMethodAssemblyHelper.py
@@ -0,0 +1,746 @@
+##
+# File:    DictMethodAssemblyHelper.py
+# Author:  J. Westbrook
+# Date:    16-Jul-2019
+# Version: 0.001 Initial version
+#
+##
+"""
+Helper class implementing external assembly-level methods  supporting the RCSB dictionary extension.
+
+"""
+__docformat__ = "restructuredtext en"
+__author__ = "John Westbrook"
+__email__ = "jwest@rcsb.rutgers.edu"
+__license__ = "Apache 2.0"
+
+import logging
+import re
+from collections import Counter
+
+from mmcif.api.DataCategory import DataCategory
+
+logger = logging.getLogger(__name__)
+
+
+def cmpElements(lhs, rhs):
+    return 0 if (lhs[-1].isdigit() or lhs[-1] in ["R", "S"]) and rhs[0].isdigit() else -1
+
+
+class DictMethodAssemblyHelper(object):
+    """Helper class implementing external assembly-level methods  supporting the RCSB dictionary extension."""
+
+    def __init__(self, **kwargs):
+        """
+        Args:
+            **kwargs: (dict)  Placeholder for future key-value arguments
+
+        """
+        #
+        self._raiseExceptions = kwargs.get("raiseExceptions", False)
+        #
+        rP = kwargs.get("resourceProvider")
+        self.__commonU = rP.getResource("DictMethodCommonUtils instance") if rP else None
+        self.__dApi = rP.getResource("Dictionary API instance (pdbx_core)") if rP else None
+        #
+        logger.debug("Dictionary method helper init")
+
+    def echo(self, msg):
+        logger.info(msg)
+
+    def addAssemblyInfo(self, dataContainer, catName, **kwargs):
+        """Build rcsb_assembly_info category.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+
+        Returns:
+            bool: True for success or False otherwise
+
+        """
+        logger.debug("Starting catName %s kwargs %r", catName, kwargs)
+        try:
+            if not (dataContainer.exists("entry") and dataContainer.exists("pdbx_struct_assembly")):
+                return False
+            logger.debug("%s beginning for %s", dataContainer.getName(), catName)
+            # Create the new target category rcsb_assembly_info
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            #
+            #
+            logger.debug("%s beginning for %s", dataContainer.getName(), catName)
+            #
+            # Get assembly comp details -
+            #
+            rD = self.__getAssemblyComposition(dataContainer)
+            #
+            cObj = dataContainer.getObj(catName)
+
+            tObj = dataContainer.getObj("entry")
+            entryId = tObj.getValue("id", 0)
+            #
+            tObj = dataContainer.getObj("pdbx_struct_assembly")
+            assemblyIdL = tObj.getAttributeValueList("id")
+            #
+            #
+            for ii, assemblyId in enumerate(assemblyIdL):
+                if assemblyId not in rD["assemblyHeavyAtomCountByTypeD"]:
+                    continue
+                if assemblyId not in rD["assemblyHeavyAtomCountD"]:
+                    continue
+                dD = rD["assemblyHeavyAtomCountByTypeD"][assemblyId]
+                #
+                cObj.setValue(entryId, "entry_id", ii)
+                cObj.setValue(assemblyId, "assembly_id", ii)
+                #
+
+                num = dD["polymer"] if "polymer" in dD else 0
+                cObj.setValue(num, "polymer_atom_count", ii)
+
+                num = dD["non-polymer"] if "non-polymer" in dD else 0
+                cObj.setValue(num, "nonpolymer_atom_count", ii)
+
+                num = dD["water"] if "water" in dD else 0
+                cObj.setValue(num, "solvent_atom_count", ii)
+
+                num = dD["branched"] if "branched" in dD else 0
+                cObj.setValue(num, "branched_atom_count", ii)
+
+                num = rD["assemblyHeavyAtomCountD"][assemblyId]
+                cObj.setValue(num, "atom_count", ii)
+                #
+                num = rD["assemblyHydrogenAtomCountD"][assemblyId]
+                cObj.setValue(num, "hydrogen_atom_count", ii)
+                #
+                num1 = rD["assemblyModeledMonomerCountD"][assemblyId]
+                num2 = rD["assemblyUnmodeledMonomerCountD"][assemblyId]
+                cObj.setValue(num1, "modeled_polymer_monomer_count", ii)
+                cObj.setValue(num2, "unmodeled_polymer_monomer_count", ii)
+                cObj.setValue(num1 + num2, "polymer_monomer_count", ii)
+                #
+                dD = rD["assemblyPolymerClassD"][assemblyId]
+                cObj.setValue(dD["polymerCompClass"], "polymer_composition", ii)
+                cObj.setValue(dD["subsetCompClass"], "selected_polymer_entity_types", ii)
+                cObj.setValue(dD["naCompClass"], "na_polymer_entity_types", ii)
+                #
+                dD = rD["assemblyInstanceCountByTypeD"][assemblyId]
+                num = dD["polymer"] if "polymer" in dD else 0
+                cObj.setValue(num, "polymer_entity_instance_count", ii)
+                #
+                num = dD["non-polymer"] if "non-polymer" in dD else 0
+                cObj.setValue(num, "nonpolymer_entity_instance_count", ii)
+                #
+                num = dD["branched"] if "branched" in dD else 0
+                cObj.setValue(num, "branched_entity_instance_count", ii)
+                #
+                num = dD["water"] if "water" in dD else 0
+                cObj.setValue(num, "solvent_entity_instance_count", ii)
+                #
+                dD = rD["assemblyInstanceCountByPolymerTypeD"][assemblyId]
+                num = dD["Protein"] if "Protein" in dD else 0
+                cObj.setValue(num, "polymer_entity_instance_count_protein", ii)
+                num1 = dD["DNA"] if "DNA" in dD else 0
+                cObj.setValue(num1, "polymer_entity_instance_count_DNA", ii)
+                num2 = dD["RNA"] if "RNA" in dD else 0
+                cObj.setValue(num2, "polymer_entity_instance_count_RNA", ii)
+                cObj.setValue(num1 + num2, "polymer_entity_instance_count_nucleic_acid", ii)
+                num = dD["NA-hybrid"] if "NA-hybrid" in dD else 0
+                cObj.setValue(num, "polymer_entity_instance_count_nucleic_acid_hybrid", ii)
+                #
+                dD = rD["assemblyEntityCountByPolymerTypeD"][assemblyId]
+                num = dD["Protein"] if "Protein" in dD else 0
+                cObj.setValue(num, "polymer_entity_count_protein", ii)
+                num1 = dD["DNA"] if "DNA" in dD else 0
+                cObj.setValue(num1, "polymer_entity_count_DNA", ii)
+                num2 = dD["RNA"] if "RNA" in dD else 0
+                cObj.setValue(num2, "polymer_entity_count_RNA", ii)
+                cObj.setValue(num1 + num2, "polymer_entity_count_nucleic_acid", ii)
+                num = dD["NA-hybrid"] if "NA-hybrid" in dD else 0
+                cObj.setValue(num, "polymer_entity_count_nucleic_acid_hybrid", ii)
+                #
+                dD = rD["assemblyEntityCountByTypeD"][assemblyId]
+                num = dD["polymer"] if "polymer" in dD else 0
+                cObj.setValue(num, "polymer_entity_count", ii)
+                #
+                num = dD["non-polymer"] if "non-polymer" in dD else 0
+                cObj.setValue(num, "nonpolymer_entity_count", ii)
+                #
+                num = dD["branched"] if "branched" in dD else 0
+                cObj.setValue(num, "branched_entity_count", ii)
+                #
+                num = dD["water"] if "water" in dD else 0
+                cObj.setValue(num, "solvent_entity_count", ii)
+            #
+            return
+        except Exception as e:
+            logger.exception("For %s failing with %s", catName, str(e))
+        return False
+
+    def buildContainerAssemblyIds(self, dataContainer, catName, **kwargs):
+        """Build category rcsb_assembly_container_identifiers.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+
+        Returns:
+            bool: True for success or False otherwise
+
+        For example,
+
+        loop_
+        _rcsb_assembly_container_identifiers.entry_id
+        _rcsb_assembly_container_identifiers.assembly_id
+        ...
+
+
+        """
+        logger.debug("Starting catName %s kwargs %r", catName, kwargs)
+        try:
+            if not (dataContainer.exists("entry") and dataContainer.exists("pdbx_struct_assembly")):
+                return False
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            #
+            cObj = dataContainer.getObj(catName)
+
+            tObj = dataContainer.getObj("entry")
+            entryId = tObj.getValue("id", 0)
+            cObj.setValue(entryId, "entry_id", 0)
+            #
+            tObj = dataContainer.getObj("pdbx_struct_assembly")
+            assemblyIdL = tObj.getAttributeValueList("id")
+            for ii, assemblyId in enumerate(assemblyIdL):
+                cObj.setValue(entryId, "entry_id", ii)
+                cObj.setValue(assemblyId, "assembly_id", ii)
+                cObj.setValue(entryId + "-" + assemblyId, "rcsb_id", ii)
+
+            #
+            return True
+        except Exception as e:
+            logger.exception("For %s failing with %s", catName, str(e))
+        return False
+
+    def addDepositedAssembly(self, dataContainer, catName, **kwargs):
+        """Add the deposited coordinates as an additional separate assembly labeled as 'deposited'
+        to categories, pdbx_struct_assembly and pdb_struct_assembly_gen.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+
+        Returns:
+            bool: True for success or False otherwise
+
+        """
+        logger.debug("Starting catName %s kwargs %r", catName, kwargs)
+        try:
+            if not dataContainer.exists("struct_asym"):
+                return False
+            if not dataContainer.exists("pdbx_struct_assembly"):
+                dataContainer.append(
+                    DataCategory(
+                        "pdbx_struct_assembly",
+                        attributeNameList=["id", "details", "method_details", "oligomeric_details", "oligomeric_count", "rcsb_details", "rcsb_candidate_assembly"],
+                    )
+                )
+            if not dataContainer.exists("pdbx_struct_assembly_gen"):
+                dataContainer.append(DataCategory("pdbx_struct_assembly_gen", attributeNameList=["assembly_id", "oper_expression", "asym_id_list", "ordinal"]))
+
+            if not dataContainer.exists("pdbx_struct_oper_list"):
+                row = [
+                    "1",
+                    "identity operation",
+                    "1_555",
+                    "x, y, z",
+                    "1.0000000000",
+                    "0.0000000000",
+                    "0.0000000000",
+                    "0.0000000000",
+                    "0.0000000000",
+                    "1.0000000000",
+                    "0.0000000000",
+                    "0.0000000000",
+                    "0.0000000000",
+                    "0.0000000000",
+                    "1.0000000000",
+                    "0.0000000000",
+                ]
+                atList = [
+                    "id",
+                    "type",
+                    "name",
+                    "symmetry_operation",
+                    "matrix[1][1]",
+                    "matrix[1][2]",
+                    "matrix[1][3]",
+                    "vector[1]",
+                    "matrix[2][1]",
+                    "matrix[2][2]",
+                    "matrix[2][3]",
+                    "vector[2]",
+                    "matrix[3][1]",
+                    "matrix[3][2]",
+                    "matrix[3][3]",
+                    "vector[3]",
+                ]
+                dataContainer.append(DataCategory("pdbx_struct_oper_list", attributeNameList=atList, rowList=[row]))
+
+            #
+            logger.debug("Add deposited assembly for %s", dataContainer.getName())
+            cObj = dataContainer.getObj("struct_asym")
+            asymIdL = cObj.getAttributeValueList("id")
+            logger.debug("AsymIdL %r", asymIdL)
+            #
+            # Ordinal is added by subsequent attribure-level method.
+            tObj = dataContainer.getObj("pdbx_struct_assembly_gen")
+            rowIdx = tObj.getRowCount()
+            tObj.setValue("deposited", "assembly_id", rowIdx)
+            tObj.setValue("1", "oper_expression", rowIdx)
+            tObj.setValue(",".join(asymIdL), "asym_id_list", rowIdx)
+            #
+            tObj = dataContainer.getObj("pdbx_struct_assembly")
+            rowIdx = tObj.getRowCount()
+            tObj.setValue("deposited", "id", rowIdx)
+            tObj.setValue("deposited_coordinates", "details", rowIdx)
+            #
+            for atName in ["oligomeric_details", "method_details", "oligomeric_count"]:
+                if tObj.hasAttribute(atName):
+                    tObj.setValue("?", atName, rowIdx)
+            #
+            #
+            #
+            logger.debug("Full row is %r", tObj.getRow(rowIdx))
+            #
+            return True
+        except Exception as e:
+            logger.exception("For %s failing with %s", catName, str(e))
+        return False
+
+    def filterAssemblyDetails(self, dataContainer, catName, **kwargs):
+        """Filter _pdbx_struct_assembly.details -> _pdbx_struct_assembly.rcsb_details
+            with a more limited vocabulary -
+
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+
+        Returns:
+            bool: True for success or False otherwise
+
+        For example, mapping to the following limited enumeration,
+
+                'author_and_software_defined_assembly'
+                'author_defined_assembly'
+                'software_defined_assembly'
+
+        """
+        logger.debug("Starting catName %s kwargs %r", catName, kwargs)
+        mD = {
+            "author_and_software_defined_assembly": "author_and_software_defined_assembly",
+            "author_defined_assembly": "author_defined_assembly",
+            "complete icosahedral assembly": "author_and_software_defined_assembly",
+            "complete point assembly": "author_and_software_defined_assembly",
+            "crystal asymmetric unit": "software_defined_assembly",
+            "crystal asymmetric unit, crystal frame": "software_defined_assembly",
+            "details": "software_defined_assembly",
+            "helical asymmetric unit": "software_defined_assembly",
+            "helical asymmetric unit, std helical frame": "software_defined_assembly",
+            "icosahedral 23 hexamer": "software_defined_assembly",
+            "icosahedral asymmetric unit": "software_defined_assembly",
+            "icosahedral asymmetric unit, std point frame": "software_defined_assembly",
+            "icosahedral pentamer": "software_defined_assembly",
+            "pentasymmetron capsid unit": "software_defined_assembly",
+            "point asymmetric unit": "software_defined_assembly",
+            "point asymmetric unit, std point frame": "software_defined_assembly",
+            "representative helical assembly": "author_and_software_defined_assembly",
+            "software_defined_assembly": "software_defined_assembly",
+            "trisymmetron capsid unit": "software_defined_assembly",
+            "deposited_coordinates": "software_defined_assembly",
+        }
+        #
+        try:
+            if not dataContainer.exists("pdbx_struct_assembly"):
+                return False
+
+            logger.debug("Filter assembly details for %s", dataContainer.getName())
+            tObj = dataContainer.getObj("pdbx_struct_assembly")
+            atName = "rcsb_details"
+            if not tObj.hasAttribute(atName):
+                tObj.appendAttribute(atName)
+            #
+            for iRow in range(tObj.getRowCount()):
+                details = tObj.getValue("details", iRow)
+                if details in mD:
+                    tObj.setValue(mD[details], "rcsb_details", iRow)
+                else:
+                    tObj.setValue("software_defined_assembly", "rcsb_details", iRow)
+                # logger.debug("Full row is %r", tObj.getRow(iRow))
+            return True
+        except Exception as e:
+            logger.exception("For %s %s failing with %s", catName, atName, str(e))
+        return False
+
+    def assignAssemblyCandidates(self, dataContainer, catName, **kwargs):
+        """Flag candidate biological assemblies as 'author_defined_assembly' ad author_and_software_defined_assembly'
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+
+        Returns:
+            bool: True for success or False otherwise
+
+        """
+        logger.debug("Starting catName %s kwargs %r", catName, kwargs)
+        mD = {
+            "author_and_software_defined_assembly": "author_and_software_defined_assembly",
+            "author_defined_assembly": "author_defined_assembly",
+            "complete icosahedral assembly": "author_and_software_defined_assembly",
+            "complete point assembly": "author_and_software_defined_assembly",
+            "crystal asymmetric unit": "software_defined_assembly",
+            "crystal asymmetric unit, crystal frame": "software_defined_assembly",
+            "details": "software_defined_assembly",
+            "helical asymmetric unit": "software_defined_assembly",
+            "helical asymmetric unit, std helical frame": "software_defined_assembly",
+            "icosahedral 23 hexamer": "software_defined_assembly",
+            "icosahedral asymmetric unit": "software_defined_assembly",
+            "icosahedral asymmetric unit, std point frame": "software_defined_assembly",
+            "icosahedral pentamer": "software_defined_assembly",
+            "pentasymmetron capsid unit": "software_defined_assembly",
+            "point asymmetric unit": "software_defined_assembly",
+            "point asymmetric unit, std point frame": "software_defined_assembly",
+            "representative helical assembly": "author_and_software_defined_assembly",
+            "software_defined_assembly": "software_defined_assembly",
+            "trisymmetron capsid unit": "software_defined_assembly",
+            "deposited_coordinates": "software_defined_assembly",
+        }
+        #
+        eD = {
+            k: True
+            for k in [
+                "crystal asymmetric unit",
+                "crystal asymmetric unit, crystal frame",
+                "helical asymmetric unit",
+                "helical asymmetric unit, std helical frame",
+                "icosahedral 23 hexamer",
+                "icosahedral asymmetric unit",
+                "icosahedral asymmetric unit, std point frame",
+                "icosahedral pentamer",
+                "pentasymmetron capsid unit",
+                "point asymmetric unit",
+                "point asymmetric unit, std point frame",
+                "trisymmetron capsid unit",
+                "deposited_coordinates",
+                "details",
+            ]
+        }
+        try:
+            if not dataContainer.exists("pdbx_struct_assembly"):
+                return False
+            atName = "rcsb_candidate_assembly"
+            tObj = dataContainer.getObj("pdbx_struct_assembly")
+            if not tObj.hasAttribute(atName):
+                tObj.appendAttribute(atName)
+            #
+            for iRow in range(tObj.getRowCount()):
+                details = tObj.getValue("details", iRow)
+                if details in mD and details not in eD:
+                    tObj.setValue("Y", "rcsb_candidate_assembly", iRow)
+                else:
+                    tObj.setValue("N", "rcsb_candidate_assembly", iRow)
+                # logger.debug("Full row is %r", tObj.getRow(iRow))
+
+            #
+            return True
+        except Exception as e:
+            logger.exception("For %s %s failing with %s", catName, atName, str(e))
+        return False
+
+    def filterAssemblyCandidates(self, dataContainer, catName, **kwargs):
+        """Filter assemblies to only candidates and deposited cases
+
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+
+        Returns:
+            bool: True for success or False otherwise
+
+
+        """
+        logger.debug("Starting catName %s kwargs %r", catName, kwargs)
+        try:
+            if not dataContainer.exists("pdbx_struct_assembly"):
+                return False
+
+            logger.debug("Filter candidate assemblyfor %s", dataContainer.getName())
+            tObj = dataContainer.getObj("pdbx_struct_assembly")
+            #
+            indexList = []
+            for iRow in range(tObj.getRowCount()):
+                isCandidate = tObj.getValue("rcsb_candidate_assembly", iRow) == "Y"
+                isDeposited = tObj.getValue("id", iRow) == "deposited"
+
+                if not (isCandidate or isDeposited):
+                    indexList.append(iRow)
+            tObj.removeRows(indexList)
+            #
+            # ---
+            numAssemblies = tObj.getRowCount()
+            logger.debug("Assembly count is %d", numAssemblies)
+            if dataContainer.exists("rcsb_entry_info"):
+                eiObj = dataContainer.getObj("rcsb_entry_info")
+                eiObj.setValue(numAssemblies, "assembly_count", 0)
+            #
+            return True
+        except Exception as e:
+            logger.exception("For %s failing with %s", catName, str(e))
+        return False
+
+    def __expandOperatorList(self, operExpression):
+        """
+        Operation expressions may have the forms:
+
+                (1)        the single operation 1
+                (1,2,5)    the operations 1, 2, 5
+                (1-4)      the operations 1,2,3 and 4
+                (1,2)(3,4) the combinations of operations
+                           3 and 4 followed by 1 and 2 (i.e.
+                           the cartesian product of parenthetical
+                           groups applied from right to left)
+        """
+
+        rL = []
+        opCount = 1
+        try:
+            if operExpression.find("(") < 0:
+                opL = [operExpression]
+            else:
+                opL = [tV.strip().strip("(").rstrip(")") for tV in re.findall(r"\(.*?\)", operExpression)]
+            #
+            for op in opL:
+                teL = []
+                tL = op.split(",")
+                for tV in tL:
+                    trngL = tV.split("-")
+                    if len(trngL) == 2:
+                        rngL = [str(r) for r in range(int(trngL[0]), int(trngL[1]) + 1)]
+                    else:
+                        rngL = trngL
+                    teL.extend(rngL)
+                rL.append(teL)
+                opCount *= len(teL)
+
+        except Exception as e:
+            logger.exception("Failing parsing %r with %s", operExpression, str(e))
+        #
+        if not rL:
+            opCount = 0
+        return opCount, rL
+
+    def __getAssemblyComposition(self, dataContainer):
+        """Return assembly composition by entity and instance type counts.
+
+        Example -
+            loop_
+            _pdbx_struct_assembly.id
+            _pdbx_struct_assembly.details
+            _pdbx_struct_assembly.method_details
+            _pdbx_struct_assembly.oligomeric_details
+            _pdbx_struct_assembly.oligomeric_count
+            1 'complete icosahedral assembly'                ? 180-meric      180
+            2 'icosahedral asymmetric unit'                  ? trimeric       3
+            3 'icosahedral pentamer'                         ? pentadecameric 15
+            4 'icosahedral 23 hexamer'                       ? octadecameric  18
+            5 'icosahedral asymmetric unit, std point frame' ? trimeric       3
+            #
+            loop_
+            _pdbx_struct_assembly_gen.assembly_id
+            _pdbx_struct_assembly_gen.oper_expression
+            _pdbx_struct_assembly_gen.asym_id_list
+            1 '(1-60)'           A,B,C
+            2 1                  A,B,C
+            3 '(1-5)'            A,B,C
+            4 '(1,2,6,10,23,24)' A,B,C
+            5 P                  A,B,C
+            #
+        """
+        #
+        instanceTypeD = self.__commonU.getInstanceTypes(dataContainer)
+        instancePolymerTypeD = self.__commonU.getInstancePolymerTypes(dataContainer)
+        instEntityD = self.__commonU.getInstanceEntityMap(dataContainer)
+        #
+        epTypeD = self.__commonU.getEntityPolymerTypes(dataContainer)
+        eTypeD = self.__commonU.getEntityTypes(dataContainer)
+        epTypeFilteredD = self.__commonU.getPolymerEntityFilteredTypes(dataContainer)
+        # JDW
+        instHeavyAtomCount = self.__commonU.getInstanceHeavyAtomCounts(dataContainer, modelId="1")
+        instHydrogenAtomCount = self.__commonU.getInstanceHydrogenAtomCounts(dataContainer, modelId="1")
+        #
+        instModeledMonomerCount = self.__commonU.getInstanceModeledMonomerCounts(dataContainer, modelId="1")
+        instUnmodeledMonomerCount = self.__commonU.getInstanceUnModeledMonomerCounts(dataContainer, modelId="1")
+        # -------------------------
+        assemblyInstanceCountByTypeD = {}
+        assemblyHeavyAtomCountByTypeD = {}
+        assemblyHeavyAtomCountD = {}
+        assemblyHydrogenAtomCountD = {}
+        assemblyModeledMonomerCountD = {}
+        assemblyUnmodeledMonomerCountD = {}
+        # Pre-generation (source instances)
+        assemblyInstanceD = {}
+        # Post-generation (gerated instances)
+        assemblyInstanceGenD = {}
+        assemblyInstanceCountByPolymerTypeD = {}
+        assemblyPolymerInstanceCountD = {}
+        assemblyPolymerClassD = {}
+        #
+        assemblyEntityCountByPolymerTypeD = {}
+        assemblyEntityCountByTypeD = {}
+        # --------------
+        #
+        try:
+            if dataContainer.exists("pdbx_struct_assembly_gen"):
+                tObj = dataContainer.getObj("pdbx_struct_assembly_gen")
+                for ii in range(tObj.getRowCount()):
+                    assemblyId = tObj.getValue("assembly_id", ii)
+                    # Initialize instances count
+                    if assemblyId not in assemblyInstanceCountByTypeD:
+                        assemblyInstanceCountByTypeD[assemblyId] = {eType: 0 for eType in ["polymer", "non-polymer", "branched", "macrolide", "water"]}
+                    if assemblyId not in assemblyHeavyAtomCountByTypeD:
+                        assemblyHeavyAtomCountByTypeD[assemblyId] = {eType: 0 for eType in ["polymer", "non-polymer", "branched", "macrolide", "water"]}
+                    if assemblyId not in assemblyModeledMonomerCountD:
+                        assemblyModeledMonomerCountD[assemblyId] = 0
+                    if assemblyId not in assemblyUnmodeledMonomerCountD:
+                        assemblyUnmodeledMonomerCountD[assemblyId] = 0
+                    if assemblyId not in assemblyHeavyAtomCountD:
+                        assemblyHeavyAtomCountD[assemblyId] = 0
+                    if assemblyId not in assemblyHydrogenAtomCountD:
+                        assemblyHydrogenAtomCountD[assemblyId] = 0
+                    #
+                    opExpression = tObj.getValue("oper_expression", ii)
+                    opCount, opL = self.__expandOperatorList(opExpression)
+                    tS = tObj.getValue("asym_id_list", ii)
+                    asymIdList = [t.strip() for t in tS.strip().split(",")]
+                    assemblyInstanceD.setdefault(assemblyId, []).extend(asymIdList)
+                    assemblyInstanceGenD.setdefault(assemblyId, []).extend(asymIdList * opCount)
+                    #
+                    logger.debug("%s assembly %r opExpression %r opCount %d opL %r", dataContainer.getName(), assemblyId, opExpression, opCount, opL)
+                    logger.debug("%s assembly %r length asymIdList %r", dataContainer.getName(), assemblyId, len(asymIdList))
+                    #
+                    for eType in ["polymer", "non-polymer", "branched", "macrolide", "water"]:
+                        iList = [asymId for asymId in asymIdList if asymId in instanceTypeD and instanceTypeD[asymId] == eType]
+                        assemblyInstanceCountByTypeD[assemblyId][eType] += len(iList) * opCount
+                        #
+                        atCountList = [
+                            instHeavyAtomCount[asymId] for asymId in asymIdList if asymId in instanceTypeD and instanceTypeD[asymId] == eType and asymId in instHeavyAtomCount
+                        ]
+                        assemblyHeavyAtomCountByTypeD[assemblyId][eType] += sum(atCountList) * opCount
+                        assemblyHeavyAtomCountD[assemblyId] += sum(atCountList) * opCount
+                        #
+                        hAtCountList = [
+                            instHydrogenAtomCount[asymId] for asymId in asymIdList if asymId in instanceTypeD and instanceTypeD[asymId] == eType and asymId in instHydrogenAtomCount
+                        ]
+                        assemblyHydrogenAtomCountD[assemblyId] += sum(hAtCountList) * opCount
+                    #
+                    modeledMonomerCountList = [
+                        instModeledMonomerCount[asymId]
+                        for asymId in asymIdList
+                        if asymId in instanceTypeD and instanceTypeD[asymId] == "polymer" and asymId in instModeledMonomerCount
+                    ]
+                    assemblyModeledMonomerCountD[assemblyId] += sum(modeledMonomerCountList) * opCount
+                    #
+                    unmodeledMonomerCountList = [
+                        instUnmodeledMonomerCount[asymId]
+                        for asymId in asymIdList
+                        if asymId in instanceTypeD and instanceTypeD[asymId] == "polymer" and asymId in instUnmodeledMonomerCount
+                    ]
+                    assemblyUnmodeledMonomerCountD[assemblyId] += sum(unmodeledMonomerCountList) * opCount
+
+                #
+                assemblyInstanceCountByPolymerTypeD = {}
+                assemblyPolymerInstanceCountD = {}
+                assemblyPolymerClassD = {}
+                #
+                assemblyEntityCountByPolymerTypeD = {}
+                assemblyEntityCountByTypeD = {}
+                #
+                # Using the generated list of instance assembly components ...
+                for assemblyId, asymIdList in assemblyInstanceGenD.items():
+                    # ------
+                    #  Instance polymer composition
+                    pInstTypeList = [instancePolymerTypeD[asymId] for asymId in asymIdList if asymId in instancePolymerTypeD]
+                    pInstTypeD = Counter(pInstTypeList)
+                    assemblyInstanceCountByPolymerTypeD[assemblyId] = {pType: 0 for pType in ["Protein", "DNA", "RNA", "NA-hybrid", "Other"]}
+                    assemblyInstanceCountByPolymerTypeD[assemblyId] = {pType: pInstTypeD[pType] for pType in ["Protein", "DNA", "RNA", "NA-hybrid", "Other"] if pType in pInstTypeD}
+                    assemblyPolymerInstanceCountD[assemblyId] = len(pInstTypeList)
+                    #
+                    logger.debug("%s assemblyId %r pInstTypeD %r", dataContainer.getName(), assemblyId, pInstTypeD.items())
+
+                    # -------------
+                    # Entity and polymer entity composition
+                    #
+                    entityIdList = list(set([instEntityD[asymId] for asymId in asymIdList if asymId in instEntityD]))
+                    pTypeL = [epTypeD[entityId] for entityId in entityIdList if entityId in epTypeD]
+                    #
+                    polymerCompClass, subsetCompClass, naCompClass, _ = self.__commonU.getPolymerComposition(pTypeL)
+                    assemblyPolymerClassD[assemblyId] = {"polymerCompClass": polymerCompClass, "subsetCompClass": subsetCompClass, "naCompClass": naCompClass}
+                    #
+                    logger.debug(
+                        "%s assemblyId %s polymerCompClass %r subsetCompClass %r naCompClass %r pTypeL %r",
+                        dataContainer.getName(),
+                        assemblyId,
+                        polymerCompClass,
+                        subsetCompClass,
+                        naCompClass,
+                        pTypeL,
+                    )
+                    pTypeFilteredL = [epTypeFilteredD[entityId] for entityId in entityIdList if entityId in epTypeFilteredD]
+                    #
+                    pEntityTypeD = Counter(pTypeFilteredL)
+                    assemblyEntityCountByPolymerTypeD[assemblyId] = {pType: 0 for pType in ["Protein", "DNA", "RNA", "NA-hybrid", "Other"]}
+                    assemblyEntityCountByPolymerTypeD[assemblyId] = {
+                        pType: pEntityTypeD[pType] for pType in ["Protein", "DNA", "RNA", "NA-hybrid", "Other"] if pType in pEntityTypeD
+                    }
+                    #
+                    eTypeL = [eTypeD[entityId] for entityId in entityIdList if entityId in eTypeD]
+                    entityTypeD = Counter(eTypeL)
+                    assemblyEntityCountByTypeD[assemblyId] = {eType: 0 for eType in ["polymer", "non-polymer", "branched", "macrolide", "water"]}
+                    assemblyEntityCountByTypeD[assemblyId] = {
+                        eType: entityTypeD[eType] for eType in ["polymer", "non-polymer", "branched", "macrolide", "water"] if eType in entityTypeD
+                    }
+                    #
+                    # ---------------
+                    #
+            #
+            logger.debug("%s assemblyInstanceCountByTypeD %r", dataContainer.getName(), assemblyInstanceCountByTypeD.items())
+            logger.debug("%s assemblyHeavyAtomCountByTypeD %r", dataContainer.getName(), assemblyHeavyAtomCountByTypeD.items())
+            logger.debug("%s assemblyHeavyAtomCountD %r", dataContainer.getName(), assemblyHeavyAtomCountD.items())
+            logger.debug("%s assemblyHydrogenAtomCountD %r", dataContainer.getName(), assemblyHydrogenAtomCountD.items())
+            logger.debug("%s assemblyModeledMonomerCountD %r", dataContainer.getName(), assemblyModeledMonomerCountD.items())
+            logger.debug("%s assemblyUnmodeledMonomerCountD %r", dataContainer.getName(), assemblyUnmodeledMonomerCountD.items())
+            logger.debug("%s assemblyPolymerClassD %r", dataContainer.getName(), assemblyPolymerClassD.items())
+            logger.debug("%s assemblyPolymerInstanceCountD %r", dataContainer.getName(), assemblyPolymerInstanceCountD.items())
+            logger.debug("%s assemblyInstanceCountByPolymerTypeD %r", dataContainer.getName(), assemblyInstanceCountByPolymerTypeD.items())
+            logger.debug("%s assemblyEntityCountByPolymerTypeD %r", dataContainer.getName(), assemblyEntityCountByPolymerTypeD.items())
+            logger.debug("%s assemblyEntityCountByTypeD %r", dataContainer.getName(), assemblyEntityCountByTypeD.items())
+            #
+            rD = {
+                "assemblyInstanceCountByTypeD": assemblyInstanceCountByTypeD,
+                "assemblyHeavyAtomCountByTypeD": assemblyHeavyAtomCountByTypeD,
+                "assemblyHeavyAtomCountD": assemblyHeavyAtomCountD,
+                "assemblyHydrogenAtomCountD": assemblyHydrogenAtomCountD,
+                "assemblyModeledMonomerCountD": assemblyModeledMonomerCountD,
+                "assemblyUnmodeledMonomerCountD": assemblyUnmodeledMonomerCountD,
+                "assemblyInstanceCountByPolymerTypeD": assemblyInstanceCountByPolymerTypeD,
+                "assemblyPolymerInstanceCountD": assemblyPolymerInstanceCountD,
+                "assemblyPolymerClassD": assemblyPolymerClassD,
+                "assemblyEntityCountByPolymerTypeD": assemblyEntityCountByPolymerTypeD,
+                "assemblyEntityCountByTypeD": assemblyEntityCountByTypeD,
+            }
+        except Exception as e:
+            logger.exception("Failing %s with %s", dataContainer.getName(), str(e))
+        return rD
diff --git a/rcsb/utils/dictionary/DictMethodChemRefHelper.py b/rcsb/utils/dictionary/DictMethodChemRefHelper.py
new file mode 100644
index 0000000..3d7792d
--- /dev/null
+++ b/rcsb/utils/dictionary/DictMethodChemRefHelper.py
@@ -0,0 +1,937 @@
+##
+# File:    DictMethodChemRefHelper.py
+# Author:  J. Westbrook
+# Date:    16-Jul-2019
+# Version: 0.001 Initial version
+#
+##
+"""
+Helper class implements external method references supporting chemical
+reference data definitions in the RCSB dictionary extension.
+"""
+__docformat__ = "restructuredtext en"
+__author__ = "John Westbrook"
+__email__ = "jwest@rcsb.rutgers.edu"
+__license__ = "Apache 2.0"
+
+import logging
+
+# from collections import Counter, OrderedDict
+
+from mmcif.api.DataCategory import DataCategory
+
+logger = logging.getLogger(__name__)
+
+
+class DictMethodChemRefHelper(object):
+    """Helper class implements external method references supporting chemical
+    reference data definitions in the RCSB dictionary extension.
+    """
+
+    def __init__(self, **kwargs):
+        """
+        Args:
+            resourceProvider: (obj) instance of DictMethodResourceProvider()
+
+        """
+        #
+        self._raiseExceptions = kwargs.get("raiseExceptions", False)
+        #
+        rP = kwargs.get("resourceProvider")
+        self.__dApi = rP.getResource("Dictionary API instance (pdbx_core)") if rP else None
+        logger.debug("Dictionary method helper init")
+
+    def echo(self, msg):
+        logger.info(msg)
+
+    def addChemCompRelated(self, dataContainer, catName, **kwargs):
+        """Add category rcsb_chem_comp_related.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+
+        Returns:
+            bool: True for success or False otherwise
+
+        For example,
+
+             loop_
+             _rcsb_chem_comp_related.comp_id
+             _rcsb_chem_comp_related.ordinal
+             _rcsb_chem_comp_related.resource_name
+             _rcsb_chem_comp_related.resource_accession_code
+             _rcsb_chem_comp_related.related_mapping_method
+             ATP 1 DrugBank DB00171 'assigned by resource'
+        """
+        try:
+            logger.debug("Starting with  %r %r", dataContainer.getName(), catName)
+            if not (dataContainer.exists("chem_comp_atom") and dataContainer.exists("chem_comp_bond")):
+                return False
+            rP = kwargs.get("resourceProvider")
+            # ------- new
+            ccId = self.__getChemCompId(dataContainer)
+            dbId, atcIdL, mappingType, dbVersion = self.__getDrugBankMapping(dataContainer, rP)
+            logger.debug("Using DrugBank version %r", dbVersion)
+            #  ------------ ----------------------- ----------------------- ----------------------- -----------
+            if dbId:
+                #
+                if dataContainer.exists("rcsb_chem_comp_container_identifiers"):
+                    tObj = dataContainer.getObj("rcsb_chem_comp_container_identifiers")
+                    if not tObj.hasAttribute("drugbank_id"):
+                        tObj.appendAttribute("drugbank_id")
+                    tObj.setValue(dbId, "drugbank_id", 0)
+                    if atcIdL:
+                        if not tObj.hasAttribute("atc_codes"):
+                            tObj.appendAttribute("atc_codes")
+                        tObj.setValue(",".join(atcIdL), "atc_codes", 0)
+                #
+                if not dataContainer.exists(catName):
+                    dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+                wObj = dataContainer.getObj(catName)
+                rL = wObj.selectIndices("DrugBank", "resource_name")
+                ok = False
+                if rL:
+                    ok = wObj.removeRows(rL)
+                    if not ok:
+                        logger.debug("Error removing rows in %r %r", catName, rL)
+                # ---
+                iRow = wObj.getRowCount()
+                wObj.setValue(ccId, "comp_id", iRow)
+                wObj.setValue(iRow + 1, "ordinal", iRow)
+                wObj.setValue("DrugBank", "resource_name", iRow)
+                wObj.setValue(dbId, "resource_accession_code", iRow)
+                wObj.setValue(mappingType, "related_mapping_method", iRow)
+                #
+            #  ------------ ----------------------- ----------------------- ----------------------- -----------
+            ccmProvider = rP.getResource("ChemCompModelProvider instance") if rP else None
+            csdMapD = ccmProvider.getMapping()
+            #
+            if csdMapD and ccId in csdMapD:
+                if not dataContainer.exists(catName):
+                    dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+                wObj = dataContainer.getObj(catName)
+                logger.debug("Using CSD model mapping length %d", len(csdMapD))
+                dbId = csdMapD[ccId][0]["db_code"]
+                rL = wObj.selectIndices("CCDC/CSD", "resource_name")
+                if rL:
+                    ok = wObj.removeRows(rL)
+                    if not ok:
+                        logger.debug("Error removing rows in %r %r", catName, rL)
+                iRow = wObj.getRowCount()
+                wObj.setValue(ccId, "comp_id", iRow)
+                wObj.setValue(iRow + 1, "ordinal", iRow)
+                wObj.setValue("CCDC/CSD", "resource_name", iRow)
+                wObj.setValue(dbId, "resource_accession_code", iRow)
+                wObj.setValue("assigned by PDB", "related_mapping_method", iRow)
+            #
+            residProvider = rP.getResource("ResidProvider instance") if rP else None
+            residMapD = residProvider.getMapping()
+            #
+            if residMapD and ccId in residMapD:
+                if not dataContainer.exists(catName):
+                    dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+                wObj = dataContainer.getObj(catName)
+                rL = wObj.selectIndices("RESID", "resource_name")
+                if rL:
+                    ok = wObj.removeRows(rL)
+                    if not ok:
+                        logger.debug("Error removing rows in %r %r", catName, rL)
+                logger.debug("Using RESID model mapping length %d", len(residMapD))
+                for rD in residMapD[ccId]:
+                    dbId = rD["residCode"]
+                    iRow = wObj.getRowCount()
+                    wObj.setValue(ccId, "comp_id", iRow)
+                    wObj.setValue(iRow + 1, "ordinal", iRow)
+                    wObj.setValue("RESID", "resource_name", iRow)
+                    wObj.setValue(dbId, "resource_accession_code", iRow)
+                    wObj.setValue("matching by RESID resource", "related_mapping_method", iRow)
+            #
+            pubchemProvider = rP.getResource("PubChemProvider instance") if rP else None
+            pubchemMapD = pubchemProvider.getIdentifiers()
+            if pubchemMapD and ccId in pubchemMapD:
+                pharosProvider = rP.getResource("PharosProvider instance") if rP else None
+                pharosChemblD = pharosProvider.getIdentifiers()
+
+                if not dataContainer.exists(catName):
+                    dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+                wObj = dataContainer.getObj(catName)
+                for rName in ["ChEBI", "ChEMBL", "CAS", "PubChem"]:
+                    rL = wObj.selectIndices(rName, "resource_name")
+                    if rL:
+                        ok = wObj.removeRows(rL)
+                        if not ok:
+                            logger.debug("Error removing rows in %r %r", catName, rL)
+                #
+                logger.debug("Using PubChem mapping length %d", len(pubchemMapD))
+                xD = {}
+                for rD in pubchemMapD[ccId]:
+                    for tName, tObj in rD.items():
+                        if tName == "pcId":
+                            xD.setdefault("PubChem", set()).add(tObj)
+                        elif tName in ["CAS", "ChEBI"]:
+                            for tId in tObj:
+                                xD.setdefault(tName, set()).add(tId)
+                        elif tName in ["ChEMBL"]:
+                            for tId in tObj:
+                                xD.setdefault(tName, set()).add(tId)
+                                if pharosChemblD and tId in pharosChemblD:
+                                    logger.debug("Mapping ccId %r to Pharos %r", ccId, tId)
+                                    xD.setdefault("Pharos", set()).add(tId)
+
+                #
+                for rName, rIdS in xD.items():
+                    if rName in ["PubChem", "Pharos"]:
+                        aMethod = "matching InChIKey in PubChem"
+                    elif rName in ["CAS", "ChEMBL", "ChEBI"]:
+                        aMethod = "assigned by PubChem resource"
+                    elif rName in ["Pharos"]:
+                        aMethod = "matching ChEMBL ID in Pharos"
+                    for rId in rIdS:
+                        iRow = wObj.getRowCount()
+                        wObj.setValue(ccId, "comp_id", iRow)
+                        wObj.setValue(iRow + 1, "ordinal", iRow)
+                        wObj.setValue(rName, "resource_name", iRow)
+                        wObj.setValue(rId, "resource_accession_code", iRow)
+                        wObj.setValue(aMethod, "related_mapping_method", iRow)
+
+            return True
+        except Exception as e:
+            logger.exception("For %s failing with %s", catName, str(e))
+        return False
+
+    def __getChemCompId(self, dataContainer):
+        if not dataContainer.exists("chem_comp"):
+            return None
+        ccObj = dataContainer.getObj("chem_comp")
+        if not ccObj.hasAttribute("pdbx_release_status"):
+            return None
+        return ccObj.getValueOrDefault("id", 0, None)
+
+    def __getDrugBankMapping(self, dataContainer, resourceProvider):
+        """Return the DrugBank mapping for the chemical definition in the input dataContainer.
+
+        Args:
+            dataContainer (obj): instance of a DataContainer() object
+            resourceProvider (obj): instance of a ResourceProvider() object
+
+        Returns:
+            mType, DrugBankId, actL (str,str, list): mapping type and DrugBank accession code, list of ATC assignments
+        """
+        try:
+            dbId = None
+            atcL = []
+            mappingType = None
+
+            dbProvider = resourceProvider.getResource("DrugBankProvider instance") if resourceProvider else None
+            dbD = dbProvider.getMapping()
+            dbVersion = dbProvider.getVersion()
+            if dbD:
+                ccId = self.__getChemCompId(dataContainer)
+                #
+                dbMapD = dbD["id_map"]
+                inKeyD = dbD["inchikey_map"]
+                atcD = dbD["db_atc_map"]
+                logger.debug("DrugBank correspondence length is %d", len(dbMapD))
+                logger.debug("atcD length is %d", len(atcD))
+                logger.debug("inKeyD length is %d", len(inKeyD))
+                #
+                if dataContainer.exists("rcsb_chem_comp_descriptor"):
+                    ccIObj = dataContainer.getObj("rcsb_chem_comp_descriptor")
+
+                    if ccIObj.hasAttribute("InChIKey"):
+                        inky = ccIObj.getValue("InChIKey", 0)
+                        logger.debug("inKeyD length is %d testing %r", len(inKeyD), inky)
+                        if inky in inKeyD:
+                            logger.debug("Matching inchikey for %s", ccId)
+                            dbId = inKeyD[inky][0]["drugbank_id"]
+                            mappingType = "matching InChIKey in DrugBank"
+                #
+
+                if not dbId and dbMapD and dataContainer.getName() in dbMapD:
+                    dbId = dbMapD[ccId]["drugbank_id"]
+                    mappingType = "assigned by DrugBank resource"
+                    logger.debug("Matching db assignment for %s", ccId)
+                if atcD and dbId in atcD:
+                    atcL = atcD[dbId]
+
+        except Exception as e:
+            logger.exception("Failing with %s", str(e))
+
+        return dbId, atcL, mappingType, dbVersion
+
+    def addChemCompAnnotation(self, dataContainer, catName, **kwargs):
+        """Generate the rcsb_chem_annotation category -
+
+        Args:
+            dataContainer ([type]): [description]
+            catName ([type]): [description]
+
+        Returns:
+            [type]: [description]
+
+                loop_
+                _rcsb_chem_comp_annotation.ordinal
+                _rcsb_chem_comp_annotation.entry_id
+                _rcsb_chem_comp_annotation.entity_id
+                #
+                _rcsb_chem_comp_annotation.annotation_id
+                _rcsb_chem_comp_annotation.type
+                _rcsb_chem_comp_annotation.name
+                _rcsb_chem_comp_annotation.description
+                #
+                _rcsb_chem_comp_annotation.annotation_lineage_id
+                _rcsb_chem_comp_annotation.annotation_lineage_name
+                _rcsb_chem_comp_annotation.annotation_lineage_depth
+                #
+                _rcsb_chem_comp_annotation.provenance_source
+                _rcsb_chem_comp_annotation.assignment_version
+                # ...
+
+                loop_
+                _pdbx_chem_comp_feature.comp_id
+                _pdbx_chem_comp_feature.type
+                _pdbx_chem_comp_feature.value
+                _pdbx_chem_comp_feature.source
+                _pdbx_chem_comp_feature.support
+                NAG 'CARBOHYDRATE ISOMER' D        PDB ?
+                NAG 'CARBOHYDRATE RING'   pyranose PDB ?
+                NAG 'CARBOHYDRATE ANOMER' beta     PDB ?
+        """
+        try:
+            if not (dataContainer.exists("chem_comp_atom") and dataContainer.exists("chem_comp_bond")):
+                return False
+            #
+            logger.debug("Starting with  %r %r", dataContainer.getName(), catName)
+            rP = kwargs.get("resourceProvider")
+            ccId = self.__getChemCompId(dataContainer)
+            # ----
+            if dataContainer.exists("pdbx_chem_comp_feature"):
+                fObj = dataContainer.getObj("pdbx_chem_comp_feature")
+                if not dataContainer.exists(catName):
+                    dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+                wObj = dataContainer.getObj(catName)
+                #
+                modDate = None
+                if dataContainer.exists("chem_comp"):
+                    cObj = dataContainer.getObj("chem_comp")
+                    if cObj.hasAttribute("pdbx_modified_date"):
+                        modDate = cObj.getValue("pdbx_modified_date", 0)
+                    else:
+                        logger.info("%r missing modified_date", ccId)
+                #
+                fD = {}
+                for ii in range(fObj.getRowCount()):
+                    pSource = fObj.getValue("source", ii)
+                    pCode = "PDB Reference Data" if pSource.upper() == "PDB" else None
+                    if not pCode:
+                        continue
+                    fType = fObj.getValue("type", ii)
+                    if fType.upper() not in ["CARBOHYDRATE ISOMER", "CARBOHYDRATE RING", "CARBOHYDRATE ANOMER", "CARBOHYDRATE PRIMARY CARBONYL GROUP"]:
+                        continue
+                    fType = fType.title()
+                    fValue = fObj.getValue("value", ii)
+                    if (fType, fValue, pCode) in fD:
+                        continue
+                    fD[(fType, fValue, pCode)] = True
+                    #
+                    iRow = wObj.getRowCount()
+                    wObj.setValue(ccId, "comp_id", iRow)
+                    wObj.setValue(iRow + 1, "ordinal", iRow)
+                    wObj.setValue(fType, "type", iRow)
+                    wObj.setValue("%s_%d" % (ccId, ii + 1), "annotation_id", iRow)
+                    wObj.setValue(fValue, "name", iRow)
+                    wObj.setValue(pCode, "provenance_source", iRow)
+                    av = modDate if modDate else "1.0"
+                    wObj.setValue(av, "assignment_version", iRow)
+            #
+            # ----
+
+            dbId, atcIdL, mappingType, dbVersion = self.__getDrugBankMapping(dataContainer, rP)
+            atcP = rP.getResource("AtcProvider instance") if rP else None
+            if atcIdL and atcP:
+                #
+                if not dataContainer.exists(catName):
+                    dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+                # -----
+                wObj = dataContainer.getObj(catName)
+                #
+                for atcId in atcIdL:
+                    iRow = wObj.getRowCount()
+                    wObj.setValue(ccId, "comp_id", iRow)
+                    wObj.setValue(iRow + 1, "ordinal", iRow)
+                    wObj.setValue("ATC", "type", iRow)
+                    wObj.setValue(atcId, "annotation_id", iRow)
+                    wObj.setValue(atcP.getAtcName(atcId), "name", iRow)
+                    #
+                    wObj.setValue("ATC " + mappingType, "description", iRow)
+                    # ---
+                    wObj.setValue(";".join(atcP.getNameLineage(atcId)), "annotation_lineage_name", iRow)
+                    idLinL = atcP.getIdLineage(atcId)
+                    wObj.setValue(";".join(idLinL), "annotation_lineage_id", iRow)
+                    wObj.setValue(";".join([str(jj) for jj in range(0, len(idLinL) + 1)]), "annotation_lineage_depth", iRow)
+                    #
+                    wObj.setValue("DrugBank", "provenance_source", iRow)
+                    wObj.setValue(dbVersion, "assignment_version", iRow)
+                    logger.debug("dbId %r atcId %r lineage %r", dbId, atcId, idLinL)
+            # -----
+            rsProvider = rP.getResource("ResidProvider instance") if rP else None
+            residD = rsProvider.getMapping()
+            #
+            if residD and (ccId in residD):
+                if not dataContainer.exists(catName):
+                    dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+                wObj = dataContainer.getObj(catName)
+                # -----
+                residVersion = rsProvider.getVersion()
+                jj = 1
+                for rD in residD[ccId]:
+                    if "modRes" not in rD:
+                        continue
+                    for modRes in rD["modRes"]:
+                        iRow = wObj.getRowCount()
+                        wObj.setValue(ccId, "comp_id", iRow)
+                        wObj.setValue(iRow + 1, "ordinal", iRow)
+                        wObj.setValue("Modification Type", "type", iRow)
+                        wObj.setValue("modres_%d" % jj, "annotation_id", iRow)
+                        wObj.setValue(modRes, "name", iRow)
+                        wObj.setValue("RESID", "provenance_source", iRow)
+                        wObj.setValue(residVersion, "assignment_version", iRow)
+                        jj += 1
+                #
+                jj = 1
+                for rD in residD[ccId]:
+                    if "genEnzymes" not in rD:
+                        continue
+                    for genEnzyme in rD["genEnzymes"]:
+                        iRow = wObj.getRowCount()
+                        wObj.setValue(ccId, "comp_id", iRow)
+                        wObj.setValue(iRow + 1, "ordinal", iRow)
+                        wObj.setValue("Generating Enzyme", "type", iRow)
+                        wObj.setValue("enzyme_%d" % jj, "annotation_id", iRow)
+                        wObj.setValue(genEnzyme, "name", iRow)
+                        wObj.setValue("RESID", "provenance_source", iRow)
+                        wObj.setValue(residVersion, "assignment_version", iRow)
+                        jj += 1
+                #
+                psimodP = rP.getResource("PsiModProvider instance") if rP else None
+                if psimodP:
+                    jj = 1
+                    for rD in residD[ccId]:
+                        if "ontRefs" not in rD:
+                            continue
+                        for ontId in rD["ontRefs"]:
+                            if ontId[:3] != "MOD":
+                                continue
+                            iRow = wObj.getRowCount()
+                            wObj.setValue(ccId, "comp_id", iRow)
+                            wObj.setValue(iRow + 1, "ordinal", iRow)
+                            wObj.setValue("PSI-MOD", "type", iRow)
+                            wObj.setValue(ontId, "annotation_id", iRow)
+                            wObj.setValue(psimodP.getName(ontId), "name", iRow)
+                            wObj.setValue("RESID", "provenance_source", iRow)
+                            wObj.setValue(residVersion, "assignment_version", iRow)
+                            #
+                            linL = psimodP.getLineage(ontId)
+                            wObj.setValue(";".join([tup[0] for tup in linL]), "annotation_lineage_id", iRow)
+                            wObj.setValue(";".join([tup[1] for tup in linL]), "annotation_lineage_name", iRow)
+                            wObj.setValue(";".join([str(tup[2]) for tup in linL]), "annotation_lineage_depth", iRow)
+                    #
+            return True
+        except Exception as e:
+            logger.exception("For %s failing with %s", catName, str(e))
+        return False
+
+    def addChemCompTargets(self, dataContainer, catName, **kwargs):
+        """Add category rcsb_chem_comp_target using DrugBank annotations.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+
+        Returns:
+            bool: True for success or False otherwise
+
+        Example:
+             loop_
+             _rcsb_chem_comp_target.comp_id
+             _rcsb_chem_comp_target.ordinal
+             _rcsb_chem_comp_target.name
+             _rcsb_chem_comp_target.interaction_type
+             _rcsb_chem_comp_target.target_actions
+             _rcsb_chem_comp_target.organism_common_name
+             _rcsb_chem_comp_target.reference_database_name
+             _rcsb_chem_comp_target.reference_database_accession_code
+             _rcsb_chem_comp_target.provenance_code
+             ATP 1 "O-phosphoseryl-tRNA(Sec) selenium transferase" target cofactor Human UniProt Q9HD40 DrugBank
+
+        DrugBank target info:
+        {
+            "type": "target",
+            "name": "Alanine--glyoxylate aminotransferase 2, mitochondrial",
+            "organism": "Human",
+            "actions": [
+               "cofactor"
+            ],
+            "known_action": "unknown",
+            "uniprot_ids": "Q9BYV1"
+         },
+
+        """
+        try:
+            logger.debug("Starting with  %r %r", dataContainer.getName(), catName)
+            # Exit if source categories are missing
+            if not (dataContainer.exists("chem_comp_atom") and dataContainer.exists("chem_comp_bond")):
+                return False
+
+            #
+            rP = kwargs.get("resourceProvider")
+            dbProvider = rP.getResource("DrugBankProvider instance") if rP else None
+            dbD = dbProvider.getMapping()
+            if not dbD:
+                return False
+
+            dbMapD = dbD["id_map"] if "id_map" in dbD else None
+            #
+            ccId = dataContainer.getName()
+            if dbMapD and ccId in dbMapD and "target_interactions" in dbMapD[ccId]:
+                #
+                # Create the new target category
+                if not dataContainer.exists(catName):
+                    dataContainer.append(
+                        DataCategory(
+                            catName,
+                            attributeNameList=[
+                                "comp_id",
+                                "ordinal",
+                                "name",
+                                "interaction_type",
+                                "target_actions",
+                                "organism_common_name",
+                                "reference_database_name",
+                                "reference_database_accession_code",
+                                "provenance_code",
+                            ],
+                        )
+                    )
+                wObj = dataContainer.getObj(catName)
+                logger.debug("Using DrugBank mapping length %d", len(dbMapD))
+                rL = wObj.selectIndices("DrugBank", "provenance_code")
+                if rL:
+                    ok = wObj.removeRows(rL)
+                    if not ok:
+                        logger.debug("Error removing rows in %r %r", catName, rL)
+                #
+                iRow = wObj.getRowCount()
+                iRow = wObj.getRowCount()
+                for tD in dbMapD[ccId]["target_interactions"]:
+                    wObj.setValue(ccId, "comp_id", iRow)
+                    wObj.setValue(iRow + 1, "ordinal", iRow)
+                    wObj.setValue(tD["name"], "name", iRow)
+                    wObj.setValue(tD["type"], "interaction_type", iRow)
+                    if "actions" in tD and tD["actions"]:
+                        wObj.setValue(";".join(tD["actions"]), "target_actions", iRow)
+                    if "organism" in tD:
+                        wObj.setValue(tD["organism"], "organism_common_name", iRow)
+                    if "uniprot_ids" in tD:
+                        wObj.setValue("UniProt", "reference_database_name", iRow)
+                        wObj.setValue(tD["uniprot_ids"], "reference_database_accession_code", iRow)
+                    wObj.setValue("DrugBank", "provenance_code", iRow)
+                    iRow += 1
+
+            #
+            return True
+        except Exception as e:
+            logger.exception("For %s failing with %s", catName, str(e))
+        return False
+
+    def __getAuditDates(self, dataContainer, catName):
+        createDate = None
+        releaseDate = None
+        reviseDate = None
+        try:
+            if dataContainer.exists(catName):
+                cObj = dataContainer.getObj(catName)
+                for iRow in range(cObj.getRowCount()):
+                    aType = cObj.getValueOrDefault("action_type", iRow, defaultValue=None)
+                    dateVal = cObj.getValueOrDefault("date", iRow, defaultValue=None)
+                    if aType in ["Create component"]:
+                        createDate = dateVal
+                    elif aType in ["Initial release"]:
+                        releaseDate = dateVal
+                reviseDate = cObj.getValueOrDefault("date", cObj.getRowCount() - 1, defaultValue=None)
+        except Exception as e:
+            logger.exception("Faling with %s", str(e))
+        return createDate, releaseDate, reviseDate
+
+    def addChemCompInfo(self, dataContainer, catName, **kwargs):
+        """Add category rcsb_chem_comp_info and rcsb_chem_comp_container_identifiers.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+
+        Returns:
+            bool: True for success or False otherwise
+
+        For example,
+             _rcsb_chem_comp_info.comp_id                 BNZ
+             _rcsb_chem_comp_info.atom_count              12
+             _rcsb_chem_comp_info.atom_count_chiral        0
+             _rcsb_chem_comp_info.bond_count              12
+             _rcsb_chem_comp_info.bond_count_aromatic      6
+             _rcsb_chem_comp_info.atom_count_heavy         6
+        """
+        try:
+            logger.debug("Starting with  %r %r %r", dataContainer.getName(), catName, kwargs)
+            # Exit if source categories are missing
+            if not dataContainer.exists("chem_comp"):
+                return False
+            ccObj = dataContainer.getObj("chem_comp")
+            if not ccObj.hasAttribute("pdbx_release_status"):
+                return False
+            ccId = ccObj.getValue("id", 0)
+            ccReleaseStatus = ccObj.getValue("pdbx_release_status", 0)
+            subComponentIds = ccObj.getValueOrDefault("pdbx_subcomponent_list", 0, defaultValue=None)
+            #
+            #
+            prdId = prdReleaseStatus = representAs = None
+            if dataContainer.exists("pdbx_reference_molecule"):
+                prdObj = dataContainer.getObj("pdbx_reference_molecule")
+                prdId = prdObj.getValueOrDefault("prd_id", 0, defaultValue=None)
+                prdReleaseStatus = prdObj.getValueOrDefault("release_status", 0, defaultValue=None)
+                representAs = prdObj.getValueOrDefault("represent_as", 0, defaultValue=None)
+            #
+            # ------- add the canonical identifiers --------
+            cN = "rcsb_chem_comp_container_identifiers"
+            if not dataContainer.exists(cN):
+                dataContainer.append(DataCategory(cN, attributeNameList=self.__dApi.getAttributeNameList(cN)))
+            idObj = dataContainer.getObj(cN)
+            idObj.setValue(ccId, "comp_id", 0)
+            if prdId:
+                idObj.setValue(prdId, "prd_id", 0)
+            idObj.setValue(ccId, "rcsb_id", 0)
+            if subComponentIds:
+                tL = [tV.strip() for tV in subComponentIds.split()]
+                idObj.setValue(",".join(tL), "subcomponent_ids", 0)
+            #
+            # Get audit info -
+            if representAs and representAs.lower() in ["polymer"]:
+                _, releaseDate, revisionDate = self.__getAuditDates(dataContainer, "pdbx_prd_audit")
+            else:
+                _, releaseDate, revisionDate = self.__getAuditDates(dataContainer, "pdbx_chem_comp_audit")
+            #
+            #  --------- --------- --------- ---------
+            # Create the new target category
+            #
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            #
+            # -------
+            wObj = dataContainer.getObj(catName)
+            #
+            numAtoms = 0
+            numAtomsHeavy = 0
+            numAtomsChiral = 0
+            try:
+                cObj = dataContainer.getObj("chem_comp_atom")
+                numAtoms = cObj.getRowCount()
+                numAtomsHeavy = 0
+                numAtomsChiral = 0
+                for ii in range(numAtoms):
+                    el = cObj.getValue("type_symbol", ii)
+                    if el != "H":
+                        numAtomsHeavy += 1
+                    chFlag = cObj.getValue("pdbx_stereo_config", ii)
+                    if chFlag != "N":
+                        numAtomsChiral += 1
+            except Exception:
+                logger.warning("Missing chem_comp_atom category for %s", ccId)
+                numAtoms = 0
+                numAtomsHeavy = 0
+                numAtomsChiral = 0
+            #
+            wObj.setValue(ccId, "comp_id", 0)
+            if prdReleaseStatus:
+                wObj.setValue(prdReleaseStatus, "release_status", 0)
+            else:
+                wObj.setValue(ccReleaseStatus, "release_status", 0)
+            #
+            wObj.setValue(releaseDate, "initial_release_date", 0)
+            wObj.setValue(revisionDate, "revision_date", 0)
+            #
+            wObj.setValue(numAtoms, "atom_count", 0)
+            wObj.setValue(numAtomsChiral, "atom_count_chiral", 0)
+            wObj.setValue(numAtomsHeavy, "atom_count_heavy", 0)
+            #
+            #  ------
+            numBonds = 0
+            numBondsAro = 0
+            try:
+                cObj = dataContainer.getObj("chem_comp_bond")
+                numBonds = cObj.getRowCount()
+                numBondsAro = 0
+                for ii in range(numAtoms):
+                    aroFlag = cObj.getValue("pdbx_aromatic_flag", ii)
+                    if aroFlag != "N":
+                        numBondsAro += 1
+            except Exception:
+                pass
+            #
+            wObj.setValue(numBonds, "bond_count", 0)
+            wObj.setValue(numBondsAro, "bond_count_aromatic", 0)
+            #
+            return True
+        except Exception as e:
+            logger.exception("For %s failing with %s", catName, str(e))
+        return False
+
+    def addChemCompDescriptor(self, dataContainer, catName, **kwargs):
+        """Add category rcsb_chem_comp_descriptor.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+
+        Returns:
+            bool: True for success or False otherwise
+
+        For example, parse the pdbx_chem_comp_descriptor category and extract SMILES/CACTVS and InChI descriptors -
+
+        loop_
+        _pdbx_chem_comp_descriptor.comp_id
+        _pdbx_chem_comp_descriptor.type
+        _pdbx_chem_comp_descriptor.program
+        _pdbx_chem_comp_descriptor.program_version
+        _pdbx_chem_comp_descriptor.descriptor
+        ATP SMILES           ACDLabs              10.04 "O=P(O)(O)OP(=O)(O)OP(=O)(O)OCC3OC(n2cnc1c(ncnc12)N)C(O)C3O"
+        ATP SMILES_CANONICAL CACTVS               3.341 "Nc1ncnc2n(cnc12)[C@@H]3O[C@H](CO[P@](O)(=O)O[P@@](O)(=O)O[P](O)(O)=O)[C@@H](O)[C@H]3O"
+        ATP SMILES           CACTVS               3.341 "Nc1ncnc2n(cnc12)[CH]3O[CH](CO[P](O)(=O)O[P](O)(=O)O[P](O)(O)=O)[CH](O)[CH]3O"
+        ATP SMILES_CANONICAL "OpenEye OEToolkits" 1.5.0 "c1nc(c2c(n1)n(cn2)[C@H]3[C@@H]([C@@H]([C@H](O3)CO[P@@](=O)(O)O[P@](=O)(O)OP(=O)(O)O)O)O)N"
+        ATP SMILES           "OpenEye OEToolkits" 1.5.0 "c1nc(c2c(n1)n(cn2)C3C(C(C(O3)COP(=O)(O)OP(=O)(O)OP(=O)(O)O)O)O)N"
+        ATP InChI            InChI                1.03  "InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15(3- ...."
+        ATP InChIKey         InChI                1.03  ZKHQWZAMYRWXGA-KQYNXXCUSA-N
+
+        To produce -
+             _rcsb_chem_comp_descriptor.comp_id                 ATP
+             _rcsb_chem_comp_descriptor.SMILES                  'Nc1ncnc2n(cnc12)[CH]3O[CH](CO[P](O)(=O)O[P](O)(=O)O[P](O)(O)=O)[CH](O)[CH]3O'
+             _rcsb_chem_comp_descriptor.SMILES_stereo           'Nc1ncnc2n(cnc12)[C@@H]3O[C@H](CO[P@](O)(=O)O[P@@](O)(=O)O[P](O)(O)=O)[C@@H](O)[C@H]3O'
+             _rcsb_chem_comp_descriptor.InChI                   'InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(26-10)1-25 ...'
+             _rcsb_chem_comp_descriptor.InChIKey                'ZKHQWZAMYRWXGA-KQYNXXCUSA-N'
+        """
+        try:
+            logger.debug("Starting with  %r %r %r", dataContainer.getName(), catName, kwargs)
+            # Exit if source categories are missing
+            if not (dataContainer.exists("chem_comp") and dataContainer.exists("pdbx_chem_comp_descriptor")):
+                return False
+            #
+            # Create the new target category
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=["comp_id", "SMILES", "SMILES_stereo", "InChI", "InChIKey"]))
+            #
+            wObj = dataContainer.getObj(catName)
+            ccIObj = dataContainer.getObj("pdbx_chem_comp_descriptor")
+            iRow = 0
+            ccId = ""
+            for ii in range(ccIObj.getRowCount()):
+                ccId = ccIObj.getValue("comp_id", ii)
+                nm = ccIObj.getValue("descriptor", ii)
+                prog = ccIObj.getValue("program", ii)
+                typ = ccIObj.getValue("type", ii)
+                #
+                if typ == "SMILES_CANONICAL" and prog.upper().startswith("OPENEYE"):
+                    wObj.setValue(nm, "SMILES_stereo", iRow)
+                elif typ == "SMILES" and prog.upper().startswith("OPENEYE"):
+                    wObj.setValue(nm, "SMILES", iRow)
+                elif typ == "InChI" and prog == "InChI":
+                    wObj.setValue(nm, "InChI", iRow)
+                elif typ == "InChIKey" and prog == "InChI":
+                    wObj.setValue(nm, "InChIKey", iRow)
+            #
+            wObj.setValue(ccId, "comp_id", iRow)
+            #
+            return True
+        except Exception as e:
+            logger.exception("For %s failing with %s", catName, str(e))
+        return False
+
+    def renameCitationCategory(self, dataContainer, catName, **kwargs):
+        """Rename citation and citation author categories.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+
+        Returns:
+            bool: True for success or False otherwise
+        """
+        try:
+            _ = kwargs
+            logger.debug("Starting with  %r %r", dataContainer.getName(), catName)
+            if not (dataContainer.exists("chem_comp") and dataContainer.exists("pdbx_chem_comp_identifier")):
+                return False
+            #
+            # Rename target categories
+            if dataContainer.exists("citation"):
+                dataContainer.rename("citation", "rcsb_bird_citation")
+            if dataContainer.exists("citation_author"):
+                dataContainer.rename("citation_author", "rcsb_bird_citation_author")
+            return True
+        except Exception as e:
+            logger.exception("For %s failing with %s", catName, str(e))
+
+        return False
+
+    def addChemCompSynonyms(self, dataContainer, catName, **kwargs):
+        """Add category rcsb_chem_comp_synonyms including PDB and DrugBank annotations.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+
+        Returns:
+            bool: True for success or False otherwise
+
+        For example,
+
+             loop_
+                 _rcsb_chem_comp_synonyms.comp_id
+                 _rcsb_chem_comp_synonyms.ordinal
+                 _rcsb_chem_comp_synonyms.name
+                 _rcsb_chem_comp_synonyms.provenance_code
+                 _rcsb_chem_comp_synonyms.type
+
+                    ATP 1 "adenosine 5'-(tetrahydrogen triphosphate)"  'PDB Reference Data' 'Preferred Name'
+                    ATP 2 "Adenosine 5'-triphosphate"  'PDB Reference Data' 'PDB Reference Data' 'Preferred Common Name'
+                    ATP 3 Atriphos  DrugBank 'Synonym'
+                    ATP 4 Striadyne DrugBank 'Synonym'
+
+        """
+        try:
+            logger.debug("Starting with  %r %r", dataContainer.getName(), catName)
+            if not (dataContainer.exists("chem_comp") and dataContainer.exists("chem_comp_atom") and dataContainer.exists("pdbx_chem_comp_identifier")):
+                return False
+            #
+            #
+            # Create the new target category
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            else:
+                # remove the rowlist -
+                pass
+            #
+            tTupL = self.__dApi.getEnumListWithDetail(catName, "type")
+            typeLookupD = {tTup[0].upper(): tTup[0] for tTup in tTupL}
+
+            pTupL = self.__dApi.getEnumListWithDetail(catName, "provenance_source")
+            provLookupD = {pTup[0].upper(): pTup[0] for pTup in pTupL}
+
+            provLookupD["ACD-LABS"] = "ACDLabs"
+            provLookupD["PDB"] = "PDB Reference Data"
+
+            wObj = dataContainer.getObj(catName)
+            #
+            # Get all of the names relevant names from the definition -
+            #
+            iRow = 0
+            nmD = {}
+            provCode = "PDB Reference Data"
+            ccObj = dataContainer.getObj("chem_comp")
+            ccId = ccObj.getValue("id", 0)
+            ccName = ccObj.getValue("name", 0)
+            # ccSynonymL = []
+            # if ccObj.hasAttribute("pdbx_synonyms"):
+            #    ccSynonymL = str(ccObj.getValue("pdbx_synonyms", 0)).split(";")
+            #
+            wObj.setValue(ccId, "comp_id", iRow)
+            wObj.setValue(ccName, "name", iRow)
+            wObj.setValue(iRow + 1, "ordinal", iRow)
+            wObj.setValue(provCode, "provenance_source", iRow)
+            wObj.setValue("Preferred Name", "type", iRow)
+            iRow += 1
+            nmD[ccName] = True
+            #
+            if dataContainer.exists("pdbx_chem_comp_synonyms"):
+                qObj = dataContainer.getObj("pdbx_chem_comp_synonyms")
+                for ii in range(qObj.getRowCount()):
+                    sType = qObj.getValue("type", ii)
+                    pCode = provCode
+                    pType = "Preferred Synonym" if sType.upper() == "PREFERRED" else "Synonym"
+                    nm = str(qObj.getValue("name", ii)).strip()
+                    if nm in nmD:
+                        continue
+                    nmD[nm] = True
+                    logger.debug("Synonym %r sType %r pCode %r", nm, sType, pCode)
+                    wObj.setValue(ccId, "comp_id", iRow)
+                    wObj.setValue(nm, "name", iRow)
+                    wObj.setValue(iRow + 1, "ordinal", iRow)
+                    wObj.setValue(pCode, "provenance_source", iRow)
+                    wObj.setValue(pType, "type", iRow)
+                    iRow += 1
+            else:
+                logger.debug("No synonyms for %s", ccId)
+
+            # for nm in ccSynonymL:
+            #     if nm in ["?", "."]:
+            #         continue
+            #     if nm in nmD:
+            #         continue
+            #     nmD[nm] = True
+            #     wObj.setValue(ccId, "comp_id", iRow)
+            #     wObj.setValue(nm, "name", iRow)
+            #     wObj.setValue(iRow + 1, "ordinal", iRow)
+            #     wObj.setValue(provCode, "provenance_source", iRow)
+            #     wObj.setValue("Synonym", "type", iRow)
+            #     iRow += 1
+            #
+            ccIObj = dataContainer.getObj("pdbx_chem_comp_identifier")
+            for ii in range(ccIObj.getRowCount()):
+                nm = str(ccIObj.getValue("identifier", ii)).strip()
+                prog = ccIObj.getValue("program", ii)
+                iType = ccIObj.getValue("type", ii)
+                if not iType or iType.upper() not in typeLookupD:
+                    continue
+                if prog and prog.upper() in provLookupD:
+                    sProg = provLookupD[prog.upper()]
+                    sType = typeLookupD[iType.upper()]
+                    wObj.setValue(ccId, "comp_id", iRow)
+                    wObj.setValue(nm, "name", iRow)
+                    wObj.setValue(iRow + 1, "ordinal", iRow)
+                    wObj.setValue(sProg, "provenance_source", iRow)
+                    wObj.setValue(sType, "type", iRow)
+
+                    iRow += 1
+                else:
+                    logger.error("%s unknown provenance %r", ccId, prog)
+            #
+            rP = kwargs.get("resourceProvider")
+            dbProvider = rP.getResource("DrugBankProvider instance") if rP else None
+            dbD = dbProvider.getMapping()
+            if dbD:
+                dbMapD = dbD["id_map"]
+                #
+                if dbMapD and ccId in dbMapD:
+                    if "aliases" in dbMapD[ccId]:
+                        iRow = wObj.getRowCount()
+                        for nm in dbMapD[ccId]["aliases"]:
+                            wObj.setValue(ccId, "comp_id", iRow)
+                            wObj.setValue(str(nm).strip(), "name", iRow)
+                            wObj.setValue(iRow + 1, "ordinal", iRow)
+                            wObj.setValue("DrugBank", "provenance_source", iRow)
+                            wObj.setValue("Synonym", "type", iRow)
+                            iRow += 1
+                    if "brand_names" in dbMapD[ccId]:
+                        iRow = wObj.getRowCount()
+                        for nm in dbMapD[ccId]["brand_names"]:
+                            wObj.setValue(ccId, "comp_id", iRow)
+                            wObj.setValue(str(nm).strip(), "name", iRow)
+                            wObj.setValue(iRow + 1, "ordinal", iRow)
+                            wObj.setValue("DrugBank", "provenance_source", iRow)
+                            wObj.setValue("Brand Name", "type", iRow)
+                            iRow += 1
+
+            return True
+        except Exception as e:
+            logger.exception("For %s failing with %s", catName, str(e))
+
+        return False
diff --git a/rcsb/utils/dictionary/DictMethodCommonUtils.py b/rcsb/utils/dictionary/DictMethodCommonUtils.py
new file mode 100644
index 0000000..0a33d69
--- /dev/null
+++ b/rcsb/utils/dictionary/DictMethodCommonUtils.py
@@ -0,0 +1,3994 @@
+##
+# File:    DictMethodCommonUtils.py
+# Author:  J. Westbrook
+# Date:    16-Jul-2019
+# Version: 0.001 Initial version
+#
+# Updates:
+# 26-Jul-2019 jdw Include struct_mon_prot_cis with secondary structure features
+#                 Add general processing of intermolecular and other connections.
+# 19-Sep-2019 jdw Add method getEntityReferenceAlignments()
+# 13-Oct-2019 jdw add isoform support
+##
+"""
+Helper class implements common utility external method references supporting the RCSB dictionary extension.
+
+"""
+__docformat__ = "restructuredtext en"
+__author__ = "John Westbrook"
+__email__ = "jwest@rcsb.rutgers.edu"
+__license__ = "Apache 2.0"
+
+# pylint: disable=too-many-lines
+
+import datetime
+import itertools
+import logging
+import re
+import sys
+from collections import OrderedDict, namedtuple
+
+from rcsb.utils.io.CacheUtils import CacheUtils
+from rcsb.utils.seq.SeqAlign import SeqAlign
+
+logger = logging.getLogger(__name__)
+
+OutlierValueFields = ("compId", "seqId", "outlierType", "description", "reported", "reference", "uncertaintyValue", "uncertaintyType")
+OutlierValue = namedtuple("OutlierValue", OutlierValueFields, defaults=(None,) * len(OutlierValueFields))
+
+BoundEntityFields = ("targetCompId", "connectType", "partnerCompId", "partnerEntityId", "partnerEntityType")
+NonpolymerBoundEntity = namedtuple("NonpolymerBoundEntity", BoundEntityFields, defaults=(None,) * len(BoundEntityFields))
+
+BoundInstanceFields = ("targetCompId", "connectType", "partnerCompId", "partnerAsymId", "partnerEntityType", "partnerSeqId", "bondDistance", "bondOrder")
+NonpolymerBoundInstance = namedtuple("NonpolymerBoundInstance", BoundInstanceFields, defaults=(None,) * len(BoundInstanceFields))
+
+NonpolymerValidationFields = ("rsr", "rscc", "mogul_bonds_rmsz", "mogul_angles_rmsz", "missing_heavy_atom_count")
+NonpolymerValidationInstance = namedtuple("NonpolymerValidationInstance", NonpolymerValidationFields, defaults=(None,) * len(NonpolymerValidationFields))
+
+
+class DictMethodCommonUtils(object):
+    """Helper class implements common utility external method references supporting the RCSB dictionary extension."""
+
+    # Dictionary of current standard monomers -
+    aaDict3 = {
+        "ALA": "A",
+        "ARG": "R",
+        "ASN": "N",
+        "ASP": "D",
+        "ASX": "B",
+        "CYS": "C",
+        "GLN": "Q",
+        "GLU": "E",
+        "GLX": "Z",
+        "GLY": "G",
+        "HIS": "H",
+        "ILE": "I",
+        "LEU": "L",
+        "LYS": "K",
+        "MET": "M",
+        "PHE": "F",
+        "PRO": "P",
+        "SER": "S",
+        "THR": "T",
+        "TRP": "W",
+        "TYR": "Y",
+        "VAL": "V",
+        "PYL": "O",
+        "SEC": "U",
+    }
+    dnaDict3 = {"DA": "A", "DC": "C", "DG": "G", "DT": "T", "DU": "U", "DI": "I"}
+    rnaDict3 = {"A": "A", "C": "C", "G": "G", "I": "I", "N": "N", "T": "T", "U": "U"}
+    # "UNK": "X",
+    # "MSE":"M",
+    # ".": "."
+    monDict3 = {**aaDict3, **dnaDict3, **rnaDict3}
+
+    def __init__(self, **kwargs):
+        """
+        Args:
+            **kwargs: (dict)  Placeholder for future key-value arguments
+
+        """
+        #
+        self._raiseExceptions = kwargs.get("raiseExceptions", False)
+        self.__wsPattern = re.compile(r"\s+", flags=re.UNICODE | re.MULTILINE)
+        self.__reNonDigit = re.compile(r"[^\d]+")
+        #
+        cacheSize = 5
+        self.__entityAndInstanceMapCache = CacheUtils(size=cacheSize, label="instance mapping")
+        self.__atomInfoCache = CacheUtils(size=cacheSize, label="atom site counts and mapping")
+        self.__protSSCache = CacheUtils(size=cacheSize, label="protein secondary structure")
+        self.__instanceConnectionCache = CacheUtils(size=cacheSize, label="instance connections")
+        self.__entityReferenceSequenceDetailsCache = CacheUtils(size=cacheSize, label="entity reference sequence details")
+        self.__entitySequenceFeatureCache = CacheUtils(size=cacheSize, label="entity sequence features")
+        self.__instanceSiteInfoCache = CacheUtils(size=cacheSize, label="instance site details")
+        self.__instanceUnobservedCache = CacheUtils(size=cacheSize, label="instance unobserved details")
+        self.__modelOutliersCache = CacheUtils(size=cacheSize, label="model outlier details")
+        #
+        logger.debug("Dictionary common utilities init")
+
+    def echo(self, msg):
+        logger.info(msg)
+
+    def testCache(self):
+        return True
+
+    def isFloat(self, val):
+        try:
+            float(val)
+        except Exception:
+            return False
+        return True
+
+    def __fetchEntityAndInstanceTypes(self, dataContainer):
+        wD = self.__entityAndInstanceMapCache.get(dataContainer.getName())
+        if not wD:
+            wD = self.__getEntityAndInstanceTypes(dataContainer)
+            self.__entityAndInstanceMapCache.set(dataContainer.getName(), wD)
+        return wD
+
+    def getFormulaWeightNonSolvent(self, dataContainer):
+        """Return a formula weight of the non-solvent entities in the deposited entry.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            float: formula weight (kilodaltons)
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchEntityAndInstanceTypes(dataContainer)
+        return wD["fwNonSolvent"] if "fwNonSolvent" in wD else {}
+
+    def getInstancePolymerTypes(self, dataContainer):
+        """Return a dictionary of polymer types for each polymer instance.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {'asymId': <dictionary polymer type>, ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchEntityAndInstanceTypes(dataContainer)
+        return wD["instancePolymerTypeD"] if "instancePolymerTypeD" in wD else {}
+
+    def getInstanceTypes(self, dataContainer):
+        """Return a dictionary of entity types for each entity instance.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {'asymId': <entity type>, ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchEntityAndInstanceTypes(dataContainer)
+        return wD["instanceTypeD"] if "instanceTypeD" in wD else {}
+
+    def getInstanceTypeCounts(self, dataContainer):
+        """Return a dictionary of the counts entity types for each entity type.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {'entity type': <# of instances>, ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchEntityAndInstanceTypes(dataContainer)
+        return wD["instanceTypeCountD"] if "instanceTypeCountD" in wD else {}
+
+    def getInstanceEntityMap(self, dataContainer):
+        """Return a dictionary of entities corresponding to each entity instance.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {'asymId': <entity id>, ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchEntityAndInstanceTypes(dataContainer)
+        return wD["instEntityD"] if "instEntityD" in wD else {}
+
+    def getEntityPolymerTypes(self, dataContainer):
+        """Return a dictionary of polymer types for each polymer entity.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {'entityId': <dictionary polymer types>, ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchEntityAndInstanceTypes(dataContainer)
+        return wD["epTypeD"] if "epTypeD" in wD else {}
+
+    def getEntityTypes(self, dataContainer):
+        """Return a dictionary of entity types for each entity.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {'entityId': <dictionary entity types>, ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchEntityAndInstanceTypes(dataContainer)
+        return wD["eTypeD"] if "eTypeD" in wD else {}
+
+    def getPolymerEntityFilteredTypes(self, dataContainer):
+        """Return a dictionary of filtered entity polymer types for each polymer entity.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {'entityId': <filtered entity polymer types>, ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchEntityAndInstanceTypes(dataContainer)
+        return wD["epTypeFilteredD"] if "epTypeFilteredD" in wD else {}
+
+    def getPolymerEntityLengths(self, dataContainer):
+        """Return a dictionary of entity polymer lengths for each polymer entity.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {'entityId': <monomer length>, ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchEntityAndInstanceTypes(dataContainer)
+        return wD["epLengthD"] if "epLengthD" in wD else {}
+
+    def getPolymerEntityLengthsEnumerated(self, dataContainer):
+        """Return a dictionary of entity polymer lengths for each polymer entity.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {'entityId': <monomer length>, ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchEntityAndInstanceTypes(dataContainer)
+        return wD["entityPolymerLengthD"] if "entityPolymerLengthD" in wD else {}
+
+    def getPolymerEntityMonomerCounts(self, dataContainer):
+        """Return a dictionary of monomer counts for each polymer entity.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {'entityId': {'compId': <monomer count>, ... }}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchEntityAndInstanceTypes(dataContainer)
+        return wD["entityPolymerMonomerCountD"] if "entityPolymerMonomerCountD" in wD else {}
+
+    def getPolymerEntityModifiedMonomers(self, dataContainer):
+        """Return a dictionary of nonstandard monomers for each polymer entity.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {'entityId': [mod_comp_id, mod_comp_id,...]}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchEntityAndInstanceTypes(dataContainer)
+        return wD["entityPolymerModifiedMonomers"] if "entityPolymerModifiedMonomers" in wD else {}
+
+    def getPolymerModifiedMonomerFeatures(self, dataContainer):
+        """Return a dictionary of nonstandard monomer features.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: [(entityId, seqId, compId, 'modified_monomer')] = set(compId)
+
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchEntityAndInstanceTypes(dataContainer)
+        return wD["seqModMonomerFeatureD"] if "seqModMonomerFeatureD" in wD else {}
+
+    def getEntityPolymerLengthBounds(self, dataContainer):
+        """Return a dictionary of polymer length bounds by entity type.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            tuple: (minLen, maxLen)
+        """
+        if not dataContainer or not dataContainer.getName():
+            return ()
+        wD = self.__fetchEntityAndInstanceTypes(dataContainer)
+        return wD["entityPolymerLengthBounds"] if "entityPolymerLengthBounds" in wD else ()
+
+    def getEntityFormulaWeightBounds(self, dataContainer):
+        """Return a dictionary of formula weight bounds by entity type.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: [entityType] = (minFw, maxFw)
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchEntityAndInstanceTypes(dataContainer)
+        return wD["fwTypeBoundD"] if "fwTypeBoundD" in wD else {}
+
+    def getTargetComponents(self, dataContainer):
+        """Return a components targets.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            list: [compId, compId,...]
+        """
+        if not dataContainer or not dataContainer.getName():
+            return []
+        wD = self.__fetchEntityAndInstanceTypes(dataContainer)
+        return wD["ccTargets"] if "ccTargets" in wD else []
+
+    def __getEntityAndInstanceTypes(self, dataContainer):
+        """Internal method to collect and return entity/instance type, size and mapping information.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            (dict) : Return dictionary of entity types, type counts and polymer type (where applicable) for
+                     each instance in the deposited unit.
+
+            Type and count contents:
+
+              instanceTypeD[asymId] = <entity_type>
+              instanceTypeCountD[<entity_type>] = #
+              instancePolymerTypeD[asymId] = <filtered polymer type>
+              eTypeD[entityId] = <dictionary entity type>
+              instEntityD[asymId] = entityId
+              epTypeD[entityId] = <dictionary polymer type>
+              epTypeFilteredD[entityId] = <dictionary polymer type>
+              epLengthD[entityId] = polymer monomer length (from one-letter-code)
+              entityPolymerLengthD[entityId] = polymer monomer length (from enumerated sequence)
+              entityPolymerMonomerCountD[entityId][compId] = mononer count
+              entityPolymerModifiedMonomers[entity]=[mod compId, mod compId]
+              seqModMonomerFeatureD[(entityId, seqId, compId, 'modified_monomer')] = set(compId)
+              fwNonSolvent = float value (kilodaltons)
+              fwTypeBoundD[entityType] = (minFw, maxFw)
+              entityPolymerLengthBounds = (minL, maxL)
+              ccTargets = [compId, compId]
+        """
+        rD = {}
+        #
+        try:
+            #
+            if not dataContainer.exists("entity") or not dataContainer.exists("struct_asym"):
+                return {}
+            eFwD = {}
+            instanceTypeD = {}
+            instancePolymerTypeD = {}
+            instanceTypeCountD = {}
+            #
+            eObj = dataContainer.getObj("entity")
+            eTypeD = {}
+            for ii in range(eObj.getRowCount()):
+                # logger.info("Attribute %r %r" % (ii, eObj.getAttributeList()))
+                entityId = eObj.getValue("id", ii)
+                eType = eObj.getValue("type", ii)
+                eTypeD[entityId] = eType
+                fw = eObj.getValue("formula_weight", ii)
+                eFwD[entityId] = float(fw) if fw and fw not in [".", "?"] else 0.0
+            #
+            epTypeD = {}
+            epLengthD = {}
+            epTypeFilteredD = {}
+            hasEntityPoly = False
+            if dataContainer.exists("entity_poly"):
+                hasEntityPoly = True
+                epObj = dataContainer.getObj("entity_poly")
+                for ii in range(epObj.getRowCount()):
+                    entityId = epObj.getValue("entity_id", ii)
+                    pType = epObj.getValue("type", ii)
+                    epTypeFilteredD[entityId] = self.filterEntityPolyType(pType)
+                    epTypeD[entityId] = pType
+                    if epObj.hasAttribute("pdbx_seq_one_letter_code_can"):
+                        sampleSeq = self.__stripWhiteSpace(epObj.getValue("pdbx_seq_one_letter_code_can", ii))
+                        epLengthD[entityId] = len(sampleSeq) if sampleSeq and sampleSeq not in ["?", "."] else None
+
+            #
+            seqModMonomerFeatureD = {}
+            entityPolymerMonomerCountD = {}
+            entityPolymerLengthD = {}
+            hasEntityPolySeq = False
+            epsObj = None
+            if dataContainer.exists("entity_poly_seq"):
+                epsObj = dataContainer.getObj("entity_poly_seq")
+                hasEntityPolySeq = True
+                tSeqD = {}
+                for ii in range(epsObj.getRowCount()):
+                    entityId = epsObj.getValue("entity_id", ii)
+                    seqNum = epsObj.getValue("num", ii)
+                    compId = epsObj.getValue("mon_id", ii)
+                    if compId not in DictMethodCommonUtils.monDict3:
+                        seqModMonomerFeatureD.setdefault((entityId, seqNum, compId, "modified_monomer"), set()).add(compId)
+                    # handle heterogeneity with the entityId,seqNum tuple
+                    tSeqD.setdefault(entityId, set()).add((entityId, seqNum))
+                    if entityId not in entityPolymerMonomerCountD:
+                        entityPolymerMonomerCountD[entityId] = {}
+                    entityPolymerMonomerCountD[entityId][compId] = entityPolymerMonomerCountD[entityId][compId] + 1 if compId in entityPolymerMonomerCountD[entityId] else 1
+                #
+                entityPolymerLengthD = {entityId: len(tSet) for entityId, tSet in tSeqD.items()}
+            #
+            if not hasEntityPoly and hasEntityPolySeq:
+                for entityId, eType in eTypeD.items():
+                    if eType in ["polymer"]:
+                        monomerL = epsObj.selectValuesWhere("mon_id", entityId, "entity_id")
+                        pType, fpType = self.guessEntityPolyTypes(monomerL)
+                        epTypeFilteredD[entityId] = fpType
+                        epTypeD[entityId] = pType
+                        epLengthD[entityId] = len(monomerL)
+
+            entityPolymerModifiedMonomers = {}
+            for entityId, cD in entityPolymerMonomerCountD.items():
+                tL = []
+                for compId, _ in cD.items():
+                    modFlag = "N" if compId in DictMethodCommonUtils.monDict3 else "Y"
+                    if modFlag == "Y":
+                        tL.append(compId)
+                entityPolymerModifiedMonomers[entityId] = sorted(set(tL))
+            #
+            logger.debug("%s entityPolymerModifiedMonomers %r", dataContainer.getName(), entityPolymerModifiedMonomers)
+            #  Add branched here
+            #
+            instEntityD = {}
+            sObj = dataContainer.getObj("struct_asym")
+            for ii in range(sObj.getRowCount()):
+                entityId = sObj.getValue("entity_id", ii)
+                asymId = sObj.getValue("id", ii)
+                instEntityD[asymId] = entityId
+                if entityId in eTypeD:
+                    instanceTypeD[asymId] = eTypeD[entityId]
+                else:
+                    logger.warning("Missing entity id entry %r asymId %r entityId %r", dataContainer.getName(), entityId, asymId)
+                if entityId in epTypeD:
+                    instancePolymerTypeD[asymId] = epTypeFilteredD[entityId]
+                #
+            #
+            # Count the instance by type - initialize all types
+            #
+            instanceTypeCountD = {k: 0 for k in ["polymer", "non-polymer", "branched", "macrolide", "water"]}
+            for asymId, eType in instanceTypeD.items():
+                instanceTypeCountD[eType] += 1
+            #
+            # Compute the total weight of polymer and non-polymer instances (full entities) - (kilodaltons)
+            #
+            fwNonSolvent = 0.0
+            for asymId, eType in instanceTypeD.items():
+                if eType not in ["water"]:
+                    entityId = instEntityD[asymId]
+                    fwNonSolvent += eFwD[entityId]
+            fwNonSolvent = fwNonSolvent / 1000.0
+            #
+            # Get ligand of interest.
+            #
+            ccTargets = []
+            if dataContainer.exists("pdbx_entity_instance_feature"):
+                ifObj = dataContainer.getObj("pdbx_entity_instance_feature")
+                for ii in range(ifObj.getRowCount()):
+                    compId = ifObj.getValue("comp_id", ii)
+                    ft = ifObj.getValue("feature_type", ii)
+                    if ft.upper() in ["SUBJECT OF INVESTIGATION"]:
+                        ccTargets.append(compId)
+            #
+            #
+            fwTypeBoundD = {}
+            tBoundD = {et: {"min": float("inf"), "max": -1.0} for eId, et in eTypeD.items()}
+            for entityId, fw in eFwD.items():
+                fw = fw / 1000.0
+                eType = eTypeD[entityId]
+                tBoundD[eType]["min"] = fw if fw < tBoundD[eType]["min"] else tBoundD[eType]["min"]
+                tBoundD[eType]["max"] = fw if fw > tBoundD[eType]["max"] else tBoundD[eType]["max"]
+            for eType in tBoundD:
+                if tBoundD[eType]["min"] > 0.00000001:
+                    fwTypeBoundD[eType] = tBoundD[eType]
+            #
+
+            entityPolymerLengthBounds = None
+            maxL = -1
+            minL = sys.maxsize
+            if epLengthD:
+                for entityId, pLen in epLengthD.items():
+                    minL = pLen if pLen < minL else minL
+                    maxL = pLen if pLen > maxL else maxL
+                entityPolymerLengthBounds = (minL, maxL)
+            #
+
+            rD = {
+                "instanceTypeD": instanceTypeD,
+                "instancePolymerTypeD": instancePolymerTypeD,
+                "instanceTypeCountD": instanceTypeCountD,
+                "instEntityD": instEntityD,
+                "eTypeD": eTypeD,
+                "epLengthD": epLengthD,
+                "epTypeD": epTypeD,
+                "epTypeFilteredD": epTypeFilteredD,
+                "entityPolymerMonomerCountD": entityPolymerMonomerCountD,
+                "entityPolymerLengthD": entityPolymerLengthD,
+                "entityPolymerModifiedMonomers": entityPolymerModifiedMonomers,
+                "seqModMonomerFeatureD": seqModMonomerFeatureD,
+                "fwNonSolvent": fwNonSolvent,
+                "fwTypeBoundD": fwTypeBoundD,
+                "entityPolymerLengthBounds": entityPolymerLengthBounds,
+                "ccTargets": ccTargets,
+            }
+            logger.debug("%s length struct_asym %d (%d) instanceTypeD %r", dataContainer.getName(), sObj.getRowCount(), len(instanceTypeD), instanceTypeD)
+        #
+        except Exception as e:
+            logger.exception("Failing with %r with %r", dataContainer.getName(), str(e))
+        #
+        return rD
+
+    def getAsymAuthIdMap(self, dataContainer):
+        """Return a dictionary of mapping between asymId and authAsymId.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {'asymId': authAsymId, ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchAtomSiteInfo(dataContainer)
+        return wD["asymAuthIdD"] if "asymAuthIdD" in wD else {}
+
+    def getInstanceHeavyAtomCounts(self, dataContainer, modelId="1"):
+        """Return a dictionary of deposited heavy atom counts for each entity instance.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+            modelId (str, optional): model index. Defaults to "1".
+
+
+        Returns:
+            dict: {'asymId': <# of deposited atoms>, ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchAtomSiteInfo(dataContainer, modelId=modelId)
+        return wD["instanceHeavyAtomCountD"] if "instanceHeavyAtomCountD" in wD else {}
+
+    def getInstanceHydrogenAtomCounts(self, dataContainer, modelId="1"):
+        """Return a dictionary of deposited hydrogen atom counts for each entity instance.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+            modelId (str, optional): model index. Defaults to "1".
+
+
+        Returns:
+            dict: {'asymId': <# of deposited atoms>, ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchAtomSiteInfo(dataContainer, modelId=modelId)
+        return wD["instanceHydrogenAtomCountD"] if "instanceHydrogenAtomCountD" in wD else {}
+
+    def getModelIdList(self, dataContainer):
+        """Return a list of model identifiers for the entry.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            list: [1,2,3]
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchAtomSiteInfo(dataContainer)
+        return wD["modelIdList"] if "modelIdList" in wD else []
+
+    def getEntityTypeHeavyAtomCounts(self, dataContainer, modelId="1"):
+        """Return a dictionary of deposited heavy atom counts for each entity type.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+            modelId (str, optional): model index. Defaults to "1".
+
+        Returns:
+            dict: {'entity type': <# of deposited atoms>, ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchAtomSiteInfo(dataContainer, modelId=modelId)
+        return wD["typeHeavyAtomCountD"] if "typeHeavyAtomCountD" in wD else {}
+
+    def getInstanceModeledMonomerCounts(self, dataContainer, modelId="1"):
+        """Return a dictionary of deposited modeled monomer counts for each entity instance.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+            modelId (str, optional): model index. Defaults to "1".
+
+        Returns:
+            dict: {'asymId': <# of deposited modeled monomers>, ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchAtomSiteInfo(dataContainer, modelId=modelId)
+        return wD["instancePolymerModeledMonomerCountD"] if "instancePolymerModeledMonomerCountD" in wD else {}
+
+    def getInstanceUnModeledMonomerCounts(self, dataContainer, modelId="1"):
+        """Return a dictionary of deposited unmodeled monomer counts for each entity instance.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+            modelId (str, optional): model index. Defaults to "1".
+
+        Returns:
+            dict: {'asymId': <# of deposited unmodeled mononmers>, ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchAtomSiteInfo(dataContainer, modelId=modelId)
+        return wD["instancePolymerUnmodeledMonomerCountD"] if "instancePolymerUnmodeledMonomerCountD" in wD else {}
+
+    def getDepositedMonomerCounts(self, dataContainer, modelId="1"):
+        """Return deposited modeled and unmodeled polymer monomer counts for the input modelid.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+            modelId (str, optional): model index. Defaults to "1".
+
+
+        Returns:
+            (int,int):  modeled and unmodeled monomer counts
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchAtomSiteInfo(dataContainer, modelId=modelId)
+        modeledCount = sum(wD["instancePolymerModeledMonomerCountD"].values())
+        unModeledCount = sum(wD["instancePolymerUnmodeledMonomerCountD"].values())
+        return modeledCount, unModeledCount
+
+    def getDepositedAtomCounts(self, dataContainer, modelId="1"):
+        """Return the number of deposited heavy atoms in the input model, the total deposited atom
+        and the total model count.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+            modelId (str, optional): model index. Defaults to "1".
+
+        Returns:
+            (int, int, int, int)  deposited heavy atoms in input model, hydrogen atoms in input model, total deposited atom count, and total deposited model count
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchAtomSiteInfo(dataContainer, modelId=modelId)
+        numHeavyAtomsModel = wD["numHeavyAtomsModel"] if "numHeavyAtomsModel" in wD else 0
+        numHydrogenAtomsModel = wD["numHydrogenAtomsModel"] if "numHydrogenAtomsModel" in wD else 0
+        numAtomsTotal = wD["numAtomsAll"] if "numAtomsAll" in wD else 0
+        numModelsTotal = wD["numModels"] if "numModels" in wD else 0
+        return numHeavyAtomsModel, numHydrogenAtomsModel, numAtomsTotal, numModelsTotal
+
+    def getInstancePolymerRanges(self, dataContainer):
+        """Return a dictionary of polymer residue range and length for each entity instance.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {"asymId": , {"sampleSeqLen": sampleSeqLen,
+                                "obsSeqLen": obsSeqLen,
+                                "begSeqId": begSeqId,
+                                "endSeqId": endSeqId,
+                                "begAuthSeqId": begAuthSeqId,
+                                "endAuthSeqId": endAuthSeqId,
+                                "begInsCode": begAuthInsCode,
+                                "endInsCode": endAuthInsCode,}...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchAtomSiteInfo(dataContainer)
+        return wD["asymIdPolymerRangesD"] if "asymIdPolymerRangesD" in wD else {}
+
+    def getInstanceIdMap(self, dataContainer):
+        """Return a dictionary of cardinal identifiers for each entity instance.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {"asymId":  {"entry_id": entryId,
+                                "entity_id": entityId,
+                                "entity_type": entityTypeD[entityId],
+                                "asym_id": asymId,
+                                "auth_asym_id": authAsymId,
+                                "comp_id": monId,
+                                "auth_seq_id": "?",}, ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchAtomSiteInfo(dataContainer)
+        return wD["instanceIdMapD"] if "instanceIdMapD" in wD else {}
+
+    def getNonPolymerIdMap(self, dataContainer):
+        """Return a dictionary of cardinal identifiers for each non-polymer entity instance.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {(authAsymId, resNum):   {"entry_id": entryId,
+                                            "entity_id": entityId,
+                                            "entity_type": entityTypeD[entityId],
+                                            "asym_id": asymId,
+                                            "auth_asym_id": authAsymId,
+                                            "comp_id": monId,
+                                            "auth_seq_id": resNum,
+                                            }, ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchAtomSiteInfo(dataContainer)
+        return wD["npAuthAsymIdMapD"] if "npAuthAsymIdMapD" in wD else {}
+
+    def getPolymerIdMap(self, dataContainer):
+        """Return a dictionary of cardinal identifiers for each polymer entity instance.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {(authAsymId, authSeqId, insCode): {
+                        "entry_id": entryId,
+                        "entity_id": entityId,
+                        "entity_type": entityTypeD[entityId],
+                        "asym_id": asymId,
+                        "comp_id": compId,
+                    }, ... }
+
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchAtomSiteInfo(dataContainer)
+        return wD["pAuthAsymIdMapD"] if "pAuthAsymIdMapD" in wD else {}
+
+    def getBranchedIdMap(self, dataContainer):
+        """Return a dictionary of cardinal identifiers for each branched entity instance.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict:  {(authAsymId, authSeqNum): {
+                        "entry_id": entryId,
+                        "entity_id": entityId,
+                        "entity_type": entityTypeD[entityId],
+                        "asym_id": asymId,
+                        "auth_asym_id": authAsymId,
+                        "comp_id": monId,
+                        "auth_seq_id": authSeqNum,
+                        "seq_num": seqNum,
+                    }, ...}
+
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchAtomSiteInfo(dataContainer)
+        return wD["brAuthAsymIdMapD"] if "brAuthAsymIdMapD" in wD else {}
+
+    def getEntityTypeUniqueIds(self, dataContainer):
+        """Return a nested dictionary of selected unique identifiers for entity types.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict:  [<entity_type>][<entity_id>] = {'asymIds': [...],'authAsymIds': [...], 'ccIds': [...]}
+
+
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchAtomSiteInfo(dataContainer)
+        return wD["entityTypeUniqueIds"] if "entityTypeUniqueIds" in wD else {}
+
+    def getAuthToSeqIdMap(self, dataContainer):
+        """Return an instance (asymId) dictionary of auth to entity residue sequence mapping
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict:   seqIdMapAsymD[asymId] = [<authSeqId + insCode>, ... ]
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchAtomSiteInfo(dataContainer)
+        return wD["seqIdMapAsymD"] if "seqIdMapAsymD" in wD else {}
+
+    def __fetchAtomSiteInfo(self, dataContainer, modelId="1"):
+        wD = self.__atomInfoCache.get((dataContainer.getName(), modelId))
+        if not wD:
+            wD = self.__getAtomSiteInfo(dataContainer, modelId=modelId)
+            self.__atomInfoCache.set((dataContainer.getName(), modelId), wD)
+        return wD
+
+    def __getAtomSiteInfo(self, dataContainer, modelId="1"):
+        """Get counting information for each instance in the deposited coordinates for the input model.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+            modelId (str, optional): model index. Defaults to "1".
+
+        Returns:
+            (dict): with atom site counting and instance mapping details.
+
+            For instance, the following are calculated:
+
+                instanceHeavyAtomCountD[asymId]:  number of deposited heavy atoms
+                typeHeavyAtomCountD[entity type]: number of deposited heavy atoms
+
+                numHeavyAtomsModel:  number of deposited heavy atoms in input model_id
+                modelId: modelId
+
+                instancePolymerModeledMonomerCountD[asymId]: number modeled polymer monomers in deposited coordinates
+                instancePolymerUnmodeledMonomerCountD[asymId]: number of polymer unmodeled monomers in deposited coordinates
+
+                numModels: total number of deposited models
+                numAtomsAll: total number of deposited atoms
+
+                asymAuthIdD = {asymId: authAsymId, ... }
+
+                asymIdPolymerRangesD = {asymId: {"sampleSeqLen": sampleSeqLen,
+                                                 "obsSeqLen": obsSeqLen,
+                                                 "begSeqId": begSeqId,
+                                                 "endSeqId": endSeqId,
+                                                 "begAuthSeqId": begAuthSeqId,
+                                                 "endAuthSeqId": endAuthSeqId,
+                                                 "begInsCode": begAuthInsCode,
+                                                 "endInsCode": endAuthInsCode,}, ...}
+                instanceIdMapD = {asymId:  {"entry_id": entryId,
+                                            "entity_id": entityId,
+                                            "entity_type": entityTypeD[entityId],
+                                            "asym_id": asymId,
+                                            "auth_asym_id": authAsymId,
+                                            "comp_id": monId,
+                                            "auth_seq_id": "?",}, ...}
+
+                 pAuthAsymIdMapD[(authAsymId, authSeqId, insCode)] = {
+                        "entry_id": entryId,
+                        "entity_id": entityId,
+                        "entity_type": entityTypeD[entityId],
+                        "asym_id": asymId,
+                        "comp_id": compId,
+                        "seq_id": seqId,
+                    }
+
+                npAuthAsymIdMapD[(authAsymId, resNum)] = {
+                        "entry_id": entryId,
+                        "entity_id": entityId,
+                        "entity_type": entityTypeD[entityId],
+                        "asym_id": asymId,
+                        "auth_asym_id": authAsymId,
+                        "comp_id": monId,
+                        "auth_seq_id": resNum,
+                    }
+
+                brAuthAsymIdMapD[(authAsymId, authSeqNum)] = {
+                        "entry_id": entryId,
+                        "entity_id": entityId,
+                        "entity_type": entityTypeD[entityId],
+                        "asym_id": asymId,
+                        "auth_asym_id": authAsymId,
+                        "comp_id": monId,
+                        "auth_seq_id": authSeqNum,
+                        "seq_num": seqNum,
+                    }
+                entityTypeUniqueIds[<entity_type>][<entity_id>] = {'asymIds': [...],'authAsymIds': [...], 'ccIds': [...]}
+
+                seqIdMapAsymD[asymId] = [<authSeqId + insCode>, ... ]
+
+        """
+        #
+        numAtomsAll = 0
+        numHeavyAtomsModel = 0
+        typeHeavyAtomCountD = {}
+        instanceHeavyAtomCountD = {}
+        #
+        numHydrogenAtomsModel = 0
+        typeHydrogenAtomCountD = {}
+        instanceHydrogenAtomCountD = {}
+        #
+        instancePolymerModeledMonomerCountD = {}
+        instancePolymerUnmodeledMonomerCountD = {}
+        atomSiteInfoD = {}
+        modelIdL = []
+        asymAuthIdD = {}
+        instanceTypeD = self.getInstanceTypes(dataContainer)
+        entityTypeD = self.getEntityTypes(dataContainer)
+        #
+        eObj = dataContainer.getObj("entity")
+        entityIdL = eObj.getAttributeValueList("id")
+        #
+        try:
+            if dataContainer.exists("atom_site"):
+                tObj = dataContainer.getObj("atom_site")
+                # All atoms all types deposited -
+                numAtomsAll = tObj.getRowCount()
+                # Heavy atoms per model -
+                cndL = [("type_symbol", "not in", ["H", "D", "T"]), ("pdbx_PDB_model_num", "eq", modelId)]
+                numHeavyAtomsModel = tObj.countValuesWhereOpConditions(cndL)
+                #
+                modelIdL = tObj.getAttributeUniqueValueList("pdbx_PDB_model_num")
+                cD = tObj.getCombinationCountsWithConditions(["label_asym_id", "pdbx_PDB_model_num"], [("type_symbol", "not in", ["H", "D", "T"])])
+                #
+                for asymId, _ in instanceTypeD.items():
+                    instanceHeavyAtomCountD[asymId] = cD[(asymId, modelId)] if (asymId, modelId) in cD else 0
+                #
+                # for eType in ['polymer', 'non-polymer', 'branched', 'macrolide', 'solvent']:
+                typeHeavyAtomCountD = {k: 0 for k in ["polymer", "non-polymer", "branched", "macrolide", "water"]}
+                for asymId, aCount in instanceHeavyAtomCountD.items():
+                    tt = instanceTypeD[asymId]
+                    typeHeavyAtomCountD[tt] += aCount
+
+                # Hydrogen counts ...
+                cndL = [("type_symbol", "in", ["H", "D", "T"]), ("pdbx_PDB_model_num", "eq", modelId)]
+                numHydrogenAtomsModel = tObj.countValuesWhereOpConditions(cndL)
+                #
+                cD = tObj.getCombinationCountsWithConditions(["label_asym_id", "pdbx_PDB_model_num"], [("type_symbol", "in", ["H", "D", "T"])])
+                for asymId, _ in instanceTypeD.items():
+                    instanceHydrogenAtomCountD[asymId] = cD[(asymId, modelId)] if (asymId, modelId) in cD else 0
+                #
+                typeHydrogenAtomCountD = {k: 0 for k in ["polymer", "non-polymer", "branched", "macrolide", "water"]}
+                for asymId, aCount in instanceHydrogenAtomCountD.items():
+                    tt = instanceTypeD[asymId]
+                    typeHydrogenAtomCountD[tt] += aCount
+                #
+            else:
+                logger.warning("Missing atom_site category for %s", dataContainer.getName())
+            #
+            numModels = len(modelIdL)
+            if numModels < 1:
+                logger.warning("Missing model details in atom_site category for %s", dataContainer.getName())
+            #
+            atomSiteInfoD = {
+                "instanceHeavyAtomCountD": instanceHeavyAtomCountD,
+                "typeHeavyAtomCountD": typeHeavyAtomCountD,
+                "numAtomsAll": numAtomsAll,
+                "numHeavyAtomsModel": numHeavyAtomsModel,
+                "numModels": len(modelIdL),
+                "modelId": modelId,
+                "modelIdList": sorted(modelIdL),
+                "instancePolymerModeledMonomerCountD": {},
+                "instancePolymerUnmodeledMonomerCountD": {},
+                "instanceHydrogenAtomCountD": instanceHydrogenAtomCountD,
+                "typeHydrogenAtomCountD": typeHydrogenAtomCountD,
+                "numHydrogenAtomsModel": numHydrogenAtomsModel,
+            }
+        except Exception as e:
+            logger.exception("Failing with %r with %r", dataContainer.getName(), str(e))
+
+        #
+        entityTypeUniqueIds = {}
+        tAsymIdD = {}
+        seqIdObsMapD = {}
+        seqIdMapAsymD = {}
+        epLengthD = self.getPolymerEntityLengths(dataContainer)
+        asymIdPolymerRangesD = {}
+        instanceIdMapD = {}
+        npAuthAsymIdMapD = {}
+        pAuthAsymIdMapD = {}
+        brAuthAsymIdMapD = {}
+        try:
+            eObj = dataContainer.getObj("entry")
+            entryId = eObj.getValue("id", 0)
+            #
+            psObj = dataContainer.getObj("pdbx_poly_seq_scheme")
+            if psObj is not None:
+                # --
+                for eId in entityIdL:
+                    if entityTypeD[eId] in ["polymer"]:
+                        tAsymIdL = psObj.selectValuesWhere("asym_id", eId, "entity_id")
+                        tAuthAsymIdL = psObj.selectValuesWhere("pdb_strand_id", eId, "entity_id")
+                        tCcIdL = psObj.selectValuesWhere("mon_id", eId, "entity_id")
+                        entityTypeUniqueIds.setdefault(entityTypeD[eId], {}).setdefault(eId, {"asymIds": tAsymIdL, "authAsymIds": tAuthAsymIdL, "ccIds": tCcIdL})
+                # ---
+                aSeqD = {}
+                aOrgSeqD = {}
+                for ii in range(psObj.getRowCount()):
+                    asymId = psObj.getValue("asym_id", ii)
+                    # authSeqId = psObj.getValue("auth_seq_num", ii)
+                    authSeqId = psObj.getValue("pdb_seq_num", ii)
+                    authOrgSeqId = psObj.getValue("auth_seq_num", ii)
+                    seqId = psObj.getValue("seq_id", ii)
+                    compId = psObj.getValue("mon_id", ii)
+                    entityId = psObj.getValue("entity_id", ii)
+                    authAsymId = psObj.getValue("pdb_strand_id", ii)
+                    #
+                    insCode = psObj.getValueOrDefault("pdb_ins_code", ii, defaultValue=None)
+                    aSeqD.setdefault(asymId, []).append(authSeqId)
+                    aOrgSeqD.setdefault(asymId, []).append(authOrgSeqId)
+                    # ---
+                    tC = authSeqId
+                    if authSeqId not in [".", "?"]:
+                        seqIdObsMapD.setdefault(asymId, {})[seqId] = (authSeqId, insCode)
+                    else:
+                        tC = "?"
+                    if insCode and tC != "?":
+                        tC += insCode
+                    seqIdMapAsymD.setdefault(asymId, []).append(tC)
+                    # ---
+                    #
+                    pAuthAsymIdMapD[(authAsymId, authSeqId, insCode)] = {
+                        "entry_id": entryId,
+                        "entity_id": entityId,
+                        "entity_type": entityTypeD[entityId],
+                        "asym_id": asymId,
+                        "comp_id": compId,
+                        "seq_id": seqId,
+                    }
+                    #
+                    if asymId in tAsymIdD:
+                        continue
+                    tAsymIdD[asymId] = entityId
+                    asymAuthIdD[asymId] = authAsymId
+                    #
+                    instanceIdMapD[asymId] = {
+                        "entry_id": entryId,
+                        "entity_id": entityId,
+                        "entity_type": entityTypeD[entityId],
+                        "asym_id": asymId,
+                        "auth_asym_id": authAsymId,
+                        "rcsb_id": entryId + "." + asymId,
+                        "comp_id": "?",
+                        "auth_seq_id": "?",
+                    }
+                    #
+
+                #
+                #  Get the modeled and unmodeled monomer counts by asymId
+                #  JDW not use aOrgSeqD.items()
+                for asymId, sL in aOrgSeqD.items():
+                    instancePolymerModeledMonomerCountD[asymId] = len([t for t in sL if t not in ["?", "."]])
+                    instancePolymerUnmodeledMonomerCountD[asymId] = len([t for t in sL if t in ["?", "."]])
+                #  Get polymer range details for each polymer instance
+                for asymId, entityId in tAsymIdD.items():
+                    sampleSeqLen = epLengthD[entityId] if entityId in epLengthD else None
+                    sL = list(seqIdObsMapD[asymId].items())
+                    begSeqId, (begAuthSeqId, begAuthInsCode) = sL[0]
+                    endSeqId, (endAuthSeqId, endAuthInsCode) = sL[-1]
+                    obsSeqLen = len(sL)
+                    #
+                    asymIdPolymerRangesD[asymId] = {
+                        "sampleSeqLen": sampleSeqLen,
+                        "obsSeqLen": obsSeqLen,
+                        "begSeqId": begSeqId,
+                        "endSeqId": endSeqId,
+                        "begAuthSeqId": begAuthSeqId,
+                        "endAuthSeqId": endAuthSeqId,
+                        "begInsCode": begAuthInsCode,
+                        "endInsCode": endAuthInsCode,
+                    }
+            atomSiteInfoD["instancePolymerModeledMonomerCountD"] = instancePolymerModeledMonomerCountD
+            atomSiteInfoD["instancePolymerUnmodeledMonomerCountD"] = instancePolymerUnmodeledMonomerCountD
+            atomSiteInfoD["asymAuthIdD"] = asymAuthIdD
+            atomSiteInfoD["asymIdPolymerRangesD"] = asymIdPolymerRangesD
+            atomSiteInfoD["seqIdMapAsymD"] = seqIdMapAsymD
+            # --------------
+            logger.debug(
+                "%s instancePolymerModeledMonomerCountD(%d) %r",
+                dataContainer.getName(),
+                sum(atomSiteInfoD["instancePolymerModeledMonomerCountD"].values()),
+                atomSiteInfoD["instancePolymerModeledMonomerCountD"],
+            )
+            logger.debug("%s instancePolymerUnmodeledMonomerCountD %r", dataContainer.getName(), atomSiteInfoD["instancePolymerUnmodeledMonomerCountD"])
+            #
+            # -------------- -------------- -------------- -------------- -------------- -------------- -------------- --------------
+            #  Add nonpolymer instance mapping
+            #
+            npsObj = dataContainer.getObj("pdbx_nonpoly_scheme")
+            if npsObj is not None:
+                # --
+                for eId in entityIdL:
+                    if entityTypeD[eId] in ["non-polymer", "water"]:
+                        tAsymIdL = npsObj.selectValuesWhere("asym_id", eId, "entity_id")
+                        tAuthAsymIdL = npsObj.selectValuesWhere("pdb_strand_id", eId, "entity_id")
+                        tCcIdL = npsObj.selectValuesWhere("mon_id", eId, "entity_id")
+                        entityTypeUniqueIds.setdefault(entityTypeD[eId], {}).setdefault(eId, {"asymIds": tAsymIdL, "authAsymIds": tAuthAsymIdL, "ccIds": tCcIdL})
+                # ---
+                for ii in range(npsObj.getRowCount()):
+                    asymId = npsObj.getValue("asym_id", ii)
+                    entityId = npsObj.getValue("entity_id", ii)
+                    authAsymId = npsObj.getValue("pdb_strand_id", ii)
+                    resNum = npsObj.getValue("pdb_seq_num", ii)
+                    monId = npsObj.getValue("mon_id", ii)
+                    asymAuthIdD[asymId] = authAsymId
+                    if asymId not in instanceIdMapD:
+                        instanceIdMapD[asymId] = {
+                            "entry_id": entryId,
+                            "entity_id": entityId,
+                            "entity_type": entityTypeD[entityId],
+                            "asym_id": asymId,
+                            "auth_asym_id": authAsymId,
+                            "rcsb_id": entryId + "." + asymId,
+                            "comp_id": monId,
+                            "auth_seq_id": resNum,
+                        }
+                    npAuthAsymIdMapD[(authAsymId, resNum)] = {
+                        "entry_id": entryId,
+                        "entity_id": entityId,
+                        "entity_type": entityTypeD[entityId],
+                        "asym_id": asymId,
+                        "auth_asym_id": authAsymId,
+                        "comp_id": monId,
+                        "auth_seq_id": resNum,
+                    }
+
+            # ---------
+            brsObj = dataContainer.getObj("pdbx_branch_scheme")
+            if brsObj is not None:
+                # --
+                for eId in entityIdL:
+                    if entityTypeD[eId] in ["branched"]:
+                        tAsymIdL = brsObj.selectValuesWhere("asym_id", eId, "entity_id")
+                        # changed to pdb_asym_id on 2020-07-29
+                        tAuthAsymIdL = brsObj.selectValuesWhere("pdb_asym_id", eId, "entity_id")
+                        tCcIdL = brsObj.selectValuesWhere("mon_id", eId, "entity_id")
+                        entityTypeUniqueIds.setdefault(entityTypeD[eId], {}).setdefault(eId, {"asymIds": tAsymIdL, "authAsymIds": tAuthAsymIdL, "ccIds": tCcIdL})
+                # ---
+                for ii in range(brsObj.getRowCount()):
+                    asymId = brsObj.getValue("asym_id", ii)
+                    entityId = brsObj.getValue("entity_id", ii)
+                    #
+                    authAsymId = brsObj.getValue("pdb_asym_id", ii)
+                    authSeqNum = brsObj.getValue("pdb_seq_num", ii)
+                    monId = brsObj.getValue("mon_id", ii)
+                    seqNum = brsObj.getValue("num", ii)
+                    asymAuthIdD[asymId] = authAsymId
+                    if asymId not in instanceIdMapD:
+                        instanceIdMapD[asymId] = {
+                            "entry_id": entryId,
+                            "entity_id": entityId,
+                            "entity_type": entityTypeD[entityId],
+                            "asym_id": asymId,
+                            "auth_asym_id": authAsymId,
+                            "rcsb_id": entryId + "." + asymId,
+                            "comp_id": monId,
+                            "auth_seq_id": "?",
+                        }
+                    brAuthAsymIdMapD[(authAsymId, authSeqNum)] = {
+                        "entry_id": entryId,
+                        "entity_id": entityId,
+                        "entity_type": entityTypeD[entityId],
+                        "asym_id": asymId,
+                        "auth_asym_id": authAsymId,
+                        "comp_id": monId,
+                        "auth_seq_id": authSeqNum,
+                        "seq_num": seqNum,
+                    }
+
+            #
+            atomSiteInfoD["instanceIdMapD"] = instanceIdMapD
+            atomSiteInfoD["npAuthAsymIdMapD"] = npAuthAsymIdMapD
+            atomSiteInfoD["pAuthAsymIdMapD"] = pAuthAsymIdMapD
+            atomSiteInfoD["brAuthAsymIdMapD"] = brAuthAsymIdMapD
+            atomSiteInfoD["entityTypeUniqueIds"] = entityTypeUniqueIds
+
+        except Exception as e:
+            logger.exception("Failing for %s with %s", dataContainer.getName(), str(e))
+
+        #
+        return atomSiteInfoD
+
+    def getProtHelixFeatures(self, dataContainer):
+        """Return a dictionary protein helical features (entity/label sequence coordinates).
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {<helix_id>: (asymId, begSeqId, endSeqId), ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchProtSecStructFeatures(dataContainer)
+        return wD["helixRangeD"] if "helixRangeD" in wD else {}
+
+    def getProtUnassignedSecStructFeatures(self, dataContainer):
+        """Return a dictionary protein regions lacking SS feature assignments (entity/label sequence coordinates).
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {<id>: (asymId, begSeqId, endSeqId), ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchProtSecStructFeatures(dataContainer)
+        return wD["unassignedRangeD"] if "unassignedRangeD" in wD else {}
+
+    def getProtSheetFeatures(self, dataContainer):
+        """Return a dictionary protein beta strand features (entity/label sequence coordinates).
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {<sheet_id>: {asymId: [(begSeqId, endSeqId), ...], }
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchProtSecStructFeatures(dataContainer)
+        return wD["instSheetRangeD"] if "instSheetRangeD" in wD else {}
+
+    def getProtSheetSense(self, dataContainer):
+        """Return a dictionary protein beta strand sense .
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {<sheet_id>: mixed|parallel|anti-parallel, ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchProtSecStructFeatures(dataContainer)
+        return wD["senseTypeD"] if "senseTypeD" in wD else {}
+
+    def getCisPeptides(self, dataContainer):
+        """Return a dictionary cis-peptides linkages using standard nomenclature.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {<id>: (begAsymId, begSeqId, endSeqId, modelId, omegaAngle), ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchProtSecStructFeatures(dataContainer)
+        return wD["cisPeptideD"] if "cisPeptideD" in wD else {}
+
+    def __fetchProtSecStructFeatures(self, dataContainer):
+        wD = self.__protSSCache.get(dataContainer.getName())
+        if not wD:
+            wD = self.getProtSecStructFeatures(dataContainer)
+            self.__protSSCache.set(dataContainer.getName(), wD)
+        return wD
+
+    def getProtSecStructFeatures(self, dataContainer):
+        """Get secondary structure features using standard nomenclature.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            (dict): with secondary structuree details
+
+            For instance, the following are calculated:
+                     {
+                        "helixCountD": {},
+                        "sheetStrandCountD": {},
+                        "unassignedCountD": {},
+                        "helixLengthD": {},
+                        "sheetStrandLengthD": {},
+                        "unassignedLengthD": {},
+                        "helixFracD": {},
+                        "sheetStrandFracD": {},
+                        "unassignedFracD": {},
+                        "sheetSenseD": {},
+                        "sheetFullStrandCountD": {},
+                        "featureMonomerSequenceD": {},
+                        "featureSequenceD": {},
+                        #
+                        "unassignedRangeD": {},
+                        "helixRangeD": {},
+                        "instHelixD": {},
+                        "sheetRangeD": {},
+                        "instSheetD": {},
+                         "senseTypeD": {}
+                         "cisPeptideD": {},
+                    }
+
+            # -- Target data categories ---
+            loop_
+            _struct_conf.conf_type_id
+            _struct_conf.id
+            _struct_conf.pdbx_PDB_helix_id
+            _struct_conf.beg_label_comp_id
+            _struct_conf.beg_label_asym_id
+            _struct_conf.beg_label_seq_id
+            _struct_conf.pdbx_beg_PDB_ins_code
+            _struct_conf.end_label_comp_id
+            _struct_conf.end_label_asym_id
+            _struct_conf.end_label_seq_id
+            _struct_conf.pdbx_end_PDB_ins_code
+
+            _struct_conf.beg_auth_comp_id
+            _struct_conf.beg_auth_asym_id
+            _struct_conf.beg_auth_seq_id
+            _struct_conf.end_auth_comp_id
+            _struct_conf.end_auth_asym_id
+            _struct_conf.end_auth_seq_id
+            _struct_conf.pdbx_PDB_helix_class
+            _struct_conf.details
+            _struct_conf.pdbx_PDB_helix_length
+            HELX_P HELX_P1 AA1 SER A 5   ? LYS A 19  ? SER A 2   LYS A 16  1 ? 15
+            HELX_P HELX_P2 AA2 GLU A 26  ? LYS A 30  ? GLU A 23  LYS A 27  5 ? 5
+            HELX_P HELX_P3 AA3 GLY A 47  ? LYS A 60  ? GLY A 44  LYS A 57  1 ? 14
+            HELX_P HELX_P4 AA4 ASP A 111 ? LEU A 125 ? ASP A 108 LEU A 122 1 ? 15
+            #
+            _struct_conf_type.id          HELX_P
+            _struct_conf_type.criteria    ?
+            _struct_conf_type.reference   ?
+            # -------------------------------------------------------------------
+
+            loop_
+            _struct_asym.id
+            _struct_asym.pdbx_blank_PDB_chainid_flag
+            _struct_asym.pdbx_modified
+            _struct_asym.entity_id
+            _struct_asym.details
+            A N N 1 ?
+            B N N 1 ?
+            #
+            _struct_sheet.id               A
+            _struct_sheet.type             ?
+            _struct_sheet.number_strands   8
+            _struct_sheet.details          ?
+            #
+            loop_
+            _struct_sheet_order.sheet_id
+            _struct_sheet_order.range_id_1
+            _struct_sheet_order.range_id_2
+            _struct_sheet_order.offset
+            _struct_sheet_order.sense
+            A 1 2 ? anti-parallel
+            A 2 3 ? anti-parallel
+            A 3 4 ? anti-parallel
+            A 4 5 ? anti-parallel
+            A 5 6 ? anti-parallel
+            A 6 7 ? anti-parallel
+            A 7 8 ? anti-parallel
+            #
+            loop_
+            _struct_sheet_range.sheet_id
+            _struct_sheet_range.id
+            _struct_sheet_range.beg_label_comp_id
+            _struct_sheet_range.beg_label_asym_id
+            _struct_sheet_range.beg_label_seq_id
+            _struct_sheet_range.pdbx_beg_PDB_ins_code
+            _struct_sheet_range.end_label_comp_id
+            _struct_sheet_range.end_label_asym_id
+            _struct_sheet_range.end_label_seq_id
+            _struct_sheet_range.pdbx_end_PDB_ins_code
+
+            _struct_sheet_range.beg_auth_comp_id
+            _struct_sheet_range.beg_auth_asym_id
+            _struct_sheet_range.beg_auth_seq_id
+            _struct_sheet_range.end_auth_comp_id
+            _struct_sheet_range.end_auth_asym_id
+            _struct_sheet_range.end_auth_seq_id
+            A 1 LYS A 5  ? VAL A 8  ? LYS A 5  VAL A 8
+            A 2 ARG A 11 ? THR A 16 ? ARG A 11 THR A 16
+            A 3 VAL A 19 ? LEU A 26 ? VAL A 19 LEU A 26
+            A 4 TYR A 29 ? ALA A 35 ? TYR A 29 ALA A 35
+            A 5 TYR B 29 ? ALA B 35 ? TYR B 29 ALA B 35
+            A 6 VAL B 19 ? LEU B 26 ? VAL B 19 LEU B 26
+            A 7 ARG B 11 ? THR B 16 ? ARG B 11 THR B 16
+            A 8 LYS B 5  ? VAL B 8  ? LYS B 5  VAL B 8
+            #
+            _struct_mon_prot_cis.pdbx_id                1
+            _struct_mon_prot_cis.label_comp_id          ASN
+            _struct_mon_prot_cis.label_seq_id           189
+            _struct_mon_prot_cis.label_asym_id          C
+            _struct_mon_prot_cis.label_alt_id           .
+            _struct_mon_prot_cis.pdbx_PDB_ins_code      ?
+            _struct_mon_prot_cis.auth_comp_id           ASN
+            _struct_mon_prot_cis.auth_seq_id            2007
+            _struct_mon_prot_cis.auth_asym_id           2
+
+            _struct_mon_prot_cis.pdbx_label_comp_id_2   PRO
+            _struct_mon_prot_cis.pdbx_label_seq_id_2    190
+            _struct_mon_prot_cis.pdbx_label_asym_id_2   C
+            _struct_mon_prot_cis.pdbx_PDB_ins_code_2    ?
+            _struct_mon_prot_cis.pdbx_auth_comp_id_2    PRO
+            _struct_mon_prot_cis.pdbx_auth_seq_id_2     2008
+            _struct_mon_prot_cis.pdbx_auth_asym_id_2    2
+
+            _struct_mon_prot_cis.pdbx_PDB_model_num     1
+            _struct_mon_prot_cis.pdbx_omega_angle       -6.45
+        """
+        rD = {
+            "helixCountD": {},
+            "sheetStrandCountD": {},
+            "unassignedCountD": {},
+            "helixLengthD": {},
+            "sheetStrandLengthD": {},
+            "unassignedLengthD": {},
+            "helixFracD": {},
+            "sheetStrandFracD": {},
+            "unassignedFracD": {},
+            "sheetSenseD": {},
+            "sheetFullStrandCountD": {},
+            "featureMonomerSequenceD": {},
+            "featureSequenceD": {},
+            #
+            "unassignedRangeD": {},
+            "helixRangeD": {},
+            "instHelixD": {},
+            "sheetRangeD": {},
+            "instSheetD": {},
+            "senseTypeD": {},
+            "cisPeptideD": {},
+        }
+        try:
+            instancePolymerTypeD = self.getInstancePolymerTypes(dataContainer)
+            instEntityD = self.getInstanceEntityMap(dataContainer)
+            epLengthD = self.getPolymerEntityLengths(dataContainer)
+            #
+            helixRangeD = {}
+            sheetRangeD = {}
+            sheetSenseD = {}
+            unassignedRangeD = {}
+            cisPeptideD = OrderedDict()
+            #
+            if dataContainer.exists("struct_mon_prot_cis"):
+                tObj = dataContainer.getObj("struct_mon_prot_cis")
+                for ii in range(tObj.getRowCount()):
+                    cId = tObj.getValue("pdbx_id", ii)
+                    begAsymId = tObj.getValue("label_asym_id", ii)
+                    # begCompId = tObj.getValue("label_comp_id", ii)
+                    begSeqId = int(tObj.getValue("label_seq_id", ii))
+                    endAsymId = tObj.getValue("pdbx_label_asym_id_2", ii)
+                    # endCompId = int(tObj.getValue("pdbx_label_comp_id_2", ii))
+                    endSeqId = int(tObj.getValue("pdbx_label_seq_id_2", ii))
+                    modelId = int(tObj.getValue("pdbx_PDB_model_num", ii))
+                    omegaAngle = float(tObj.getValue("pdbx_omega_angle", ii))
+                    #
+                    if (begAsymId == endAsymId) and (begSeqId <= endSeqId):
+                        cisPeptideD.setdefault(cId, []).append((begAsymId, begSeqId, endSeqId, modelId, omegaAngle))
+                    else:
+                        logger.debug("%s inconsistent cis peptide description id = %s", dataContainer.getName(), cId)
+
+            if dataContainer.exists("struct_conf"):
+                tObj = dataContainer.getObj("struct_conf")
+                helixRangeD = OrderedDict()
+                for ii in range(tObj.getRowCount()):
+                    confType = str(tObj.getValue("conf_type_id", ii)).strip().upper()
+                    if confType in ["HELX_P"]:
+                        hId = tObj.getValue("id", ii)
+                        begAsymId = tObj.getValue("beg_label_asym_id", ii)
+                        endAsymId = tObj.getValue("end_label_asym_id", ii)
+                        try:
+                            tbegSeqId = int(tObj.getValue("beg_label_seq_id", ii))
+                            tendSeqId = int(tObj.getValue("end_label_seq_id", ii))
+                            begSeqId = min(tbegSeqId, tendSeqId)
+                            endSeqId = max(tbegSeqId, tendSeqId)
+                        except Exception:
+                            continue
+                        if (begAsymId == endAsymId) and (begSeqId <= endSeqId):
+                            helixRangeD.setdefault(hId, []).append((begAsymId, begSeqId, endSeqId))
+                        else:
+                            logger.debug("%s inconsistent struct_conf description id = %s", dataContainer.getName(), hId)
+
+            logger.debug("%s helixRangeD %r", dataContainer.getName(), helixRangeD.items())
+
+            if dataContainer.exists("struct_sheet_range"):
+                tObj = dataContainer.getObj("struct_sheet_range")
+                sheetRangeD = OrderedDict()
+                for ii in range(tObj.getRowCount()):
+                    sId = tObj.getValue("sheet_id", ii)
+                    begAsymId = tObj.getValue("beg_label_asym_id", ii)
+                    endAsymId = tObj.getValue("end_label_asym_id", ii)
+                    # Most obsolete entries do no define this
+                    try:
+                        tbegSeqId = int(tObj.getValue("beg_label_seq_id", ii))
+                        tendSeqId = int(tObj.getValue("end_label_seq_id", ii))
+                        begSeqId = min(tbegSeqId, tendSeqId)
+                        endSeqId = max(tbegSeqId, tendSeqId)
+                    except Exception:
+                        continue
+                    if (begAsymId == endAsymId) and (begSeqId <= endSeqId):
+                        sheetRangeD.setdefault(sId, []).append((begAsymId, begSeqId, endSeqId))
+                    else:
+                        logger.debug("%s inconsistent struct_sheet_range description id = %s", dataContainer.getName(), sId)
+
+            logger.debug("%s sheetRangeD %r", dataContainer.getName(), sheetRangeD.items())
+            #
+            if dataContainer.exists("struct_sheet_order"):
+                tObj = dataContainer.getObj("struct_sheet_order")
+                #
+                sheetSenseD = OrderedDict()
+                for ii in range(tObj.getRowCount()):
+                    sId = tObj.getValue("sheet_id", ii)
+                    sense = str(tObj.getValue("sense", ii)).strip().lower()
+                    sheetSenseD.setdefault(sId, []).append(sense)
+            #
+            logger.debug("%s sheetSenseD %r", dataContainer.getName(), sheetSenseD.items())
+            # --------
+
+            unassignedCoverageD = {}
+            unassignedCountD = {}
+            unassignedLengthD = {}
+            unassignedFracD = {}
+
+            helixCoverageD = {}
+            helixCountD = {}
+            helixLengthD = {}
+            helixFracD = {}
+            instHelixD = {}
+
+            sheetCoverageD = {}
+            sheetStrandCountD = {}
+            sheetStrandLengthD = {}
+            strandsPerBetaSheetD = {}
+            sheetFullStrandCountD = {}
+            sheetStrandFracD = {}
+            instSheetD = {}
+            instSheetSenseD = {}
+            #
+            featureMonomerSequenceD = {}
+            featureSequenceD = {}
+            #
+            # ------------
+            # Initialize over all protein instances
+            for asymId, filteredType in instancePolymerTypeD.items():
+                if filteredType != "Protein":
+                    continue
+                helixCoverageD[asymId] = []
+                helixLengthD[asymId] = []
+                helixCountD[asymId] = 0
+                helixFracD[asymId] = 0.0
+                instHelixD[asymId] = []
+                #
+                sheetCoverageD[asymId] = []
+                sheetStrandCountD[asymId] = 0
+                sheetStrandLengthD[asymId] = []
+                sheetFullStrandCountD[asymId] = []
+                sheetStrandFracD[asymId] = 0.0
+                instSheetD[asymId] = []
+                instSheetSenseD[asymId] = []
+                #
+                unassignedCountD[asymId] = 0
+                unassignedLengthD[asymId] = []
+                unassignedFracD[asymId] = 0.0
+                #
+                featureMonomerSequenceD[asymId] = None
+                featureSequenceD[asymId] = None
+            # -------------
+            #
+            for hId, hL in helixRangeD.items():
+                for (asymId, begSeqId, endSeqId) in hL:
+                    helixCoverageD.setdefault(asymId, []).extend(range(begSeqId, endSeqId + 1))
+                    helixLengthD.setdefault(asymId, []).append(abs(begSeqId - endSeqId) + 1)
+                    helixCountD[asymId] = helixCountD[asymId] + 1 if asymId in helixCountD else 0
+                    instHelixD.setdefault(asymId, []).append(hId)
+            #
+            # ---------
+            # betaSheetCount = len(sheetRangeD)
+            #
+            for sId, sL in sheetRangeD.items():
+                strandsPerBetaSheetD[sId] = len(sL)
+                for (asymId, begSeqId, endSeqId) in sL:
+                    sheetCoverageD.setdefault(asymId, []).extend(range(begSeqId, endSeqId + 1))
+                    sheetStrandLengthD.setdefault(asymId, []).append(abs(begSeqId - endSeqId) + 1)
+                    sheetStrandCountD[asymId] = sheetStrandCountD[asymId] + 1 if asymId in sheetStrandCountD else 0
+                    instSheetD.setdefault(asymId, []).append(sId)
+            #
+            instSheetRangeD = {}
+            for sId, sL in sheetRangeD.items():
+                aD = {}
+                for (asymId, begSeqId, endSeqId) in sL:
+                    aD.setdefault(asymId, []).append((begSeqId, endSeqId))
+                instSheetRangeD[sId] = aD
+            #
+            # ---------
+            senseTypeD = {}
+            for sheetId, sL in sheetSenseD.items():
+                if not sL:
+                    continue
+                usL = list(set(sL))
+                if len(usL) == 1:
+                    senseTypeD[sheetId] = usL[0]
+                else:
+                    senseTypeD[sheetId] = "mixed"
+            # ---------
+            #
+            for asymId, filteredType in instancePolymerTypeD.items():
+                logger.debug("%s processing %s type %r", dataContainer.getName(), asymId, filteredType)
+                if filteredType != "Protein":
+                    continue
+                entityId = instEntityD[asymId]
+                entityLen = epLengthD[entityId]
+                entityS = set(range(1, entityLen + 1))
+                eLen = len(entityS)
+                #
+                helixS = set(helixCoverageD[asymId])
+                sheetS = set(sheetCoverageD[asymId])
+                commonS = helixS & sheetS
+                if commonS:
+                    logger.debug("%s asymId %s overlapping secondary structure assignments for monomers %r", dataContainer.getName(), asymId, commonS)
+                    # continue
+
+                hLen = len(helixS) if asymId in helixCoverageD else 0
+                sLen = len(sheetS) if asymId in sheetCoverageD else 0
+                #
+                unassignedS = entityS - helixS if hLen else entityS
+                unassignedS = unassignedS - sheetS if sLen else unassignedS
+                tLen = len(unassignedS)
+                #
+                # if eLen != hLen + sLen + tLen:
+                #    logger.warning("%s overlapping secondary structure assignments for asymId %s", dataContainer.getName(), asymId)
+                #    continue
+                #
+                unassignedCoverageD[asymId] = list(unassignedS)
+                helixFracD[asymId] = float(hLen) / float(eLen)
+                sheetStrandFracD[asymId] = float(sLen) / float(eLen)
+                unassignedFracD[asymId] = float(tLen) / float(eLen)
+                #
+                unassignedRangeD[asymId] = list(self.__toRangeList(unassignedS))
+                unassignedCountD[asymId] = len(unassignedRangeD[asymId])
+                unassignedLengthD[asymId] = [abs(i - j) + 1 for (i, j) in unassignedRangeD[asymId]]
+                #
+                # ------
+                sIdL = instSheetD[asymId]
+                #
+                instSheetSenseD[asymId] = [senseTypeD[sId] for sId in sIdL if sId in senseTypeD]
+                sheetFullStrandCountD[asymId] = [strandsPerBetaSheetD[sId] for sId in sIdL if sId in strandsPerBetaSheetD]
+                #
+
+                # ------
+                ssTypeL = ["_"] * eLen
+                if hLen:
+                    for idx in helixCoverageD[asymId]:
+                        ssTypeL[idx - 1] = "H"
+                if sLen:
+                    for idx in sheetCoverageD[asymId]:
+                        ssTypeL[idx - 1] = "S"
+                if tLen:
+                    for idx in unassignedCoverageD[asymId]:
+                        ssTypeL[idx - 1] = "_"
+                #
+                featureMonomerSequenceD[asymId] = "".join(ssTypeL)
+                featureSequenceD[asymId] = "".join([t[0] for t in itertools.groupby(ssTypeL)])
+            # ---------
+
+            rD = {
+                "helixCountD": helixCountD,
+                "sheetStrandCountD": sheetStrandCountD,
+                "unassignedCountD": unassignedCountD,
+                "helixLengthD": helixLengthD,
+                "sheetStrandLengthD": sheetStrandLengthD,
+                "unassignedLengthD": unassignedLengthD,
+                "helixFracD": helixFracD,
+                "sheetStrandFracD": sheetStrandFracD,
+                "unassignedFracD": unassignedFracD,
+                "sheetSenseD": instSheetSenseD,
+                "sheetFullStrandCountD": sheetFullStrandCountD,
+                "featureMonomerSequenceD": featureMonomerSequenceD,
+                "featureSequenceD": featureSequenceD,
+                #
+                "unassignedRangeD": unassignedRangeD,
+                "helixRangeD": helixRangeD,
+                "instHelixD": instHelixD,
+                # "sheetRangeD": sheetRangeD,
+                "instSheetRangeD": instSheetRangeD,
+                "instSheetD": instSheetD,
+                "senseTypeD": senseTypeD,
+                "cisPeptideD": cisPeptideD,
+            }
+            # self.__secondaryStructD = rD
+            # self.__setEntryCache(dataContainer.getName())
+        except Exception as e:
+            logger.exception("Failing for %s with %s", dataContainer.getName(), str(e))
+        #
+        return rD
+
+    # Connection related
+    def getInstanceConnectionCounts(self, dataContainer):
+        """Return a dictionary instance connection counts.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {<connection type>: #count, ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchInstanceConnections(dataContainer)
+        return wD["instConnectCountD"] if "instConnectCountD" in wD else {}
+
+    def getInstanceConnections(self, dataContainer):
+        """Return a list of instance connections.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            list: [{"connect_type": <val>, "connect_target_label_comp_id": <val>, ... },...]
+
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchInstanceConnections(dataContainer)
+        return wD["instConnectL"] if "instConnectL" in wD else {}
+
+    def getBoundNonpolymersComponentIds(self, dataContainer):
+        """Return a list of bound non-polymers in the entry.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {<entityId>: NonpolymerBoundEntity("targetCompId", "connectType", "partnerCompId", "partnerEntityId", "partnerEntityType"), }
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchInstanceConnections(dataContainer)
+        return wD["boundNonpolymerComponentIdL"] if "boundNonpolymerComponentIdL" in wD else {}
+
+    def getBoundNonpolymersByEntity(self, dataContainer):
+        """Return a dictonary of bound non-polymers by entity.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {<entityId>: NonpolymerBoundEntity("targetCompId", "connectType", "partnerCompId", "partnerEntityId", "partnerEntityType"), }
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchInstanceConnections(dataContainer)
+        return wD["boundNonpolymerEntityD"] if "boundNonpolymerEntityD" in wD else {}
+
+    def getBoundNonpolymersByInstance(self, dataContainer):
+        """Return a dictonary of bound non-polymers by instance.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {<asymId>: NonpolymerBoundInstance("targetCompId", "connectType", "partnerCompId", "partnerAsymId", "partnerEntityType", "bondDistance", "bondOrder"), }
+
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchInstanceConnections(dataContainer)
+        return wD["boundNonpolymerInstanceD"] if "boundNonpolymerInstanceD" in wD else {}
+
+    def __fetchInstanceConnections(self, dataContainer):
+        wD = self.__instanceConnectionCache.get(dataContainer.getName())
+        if not wD:
+            wD = self.__getInstanceConnections(dataContainer)
+            self.__instanceConnectionCache.set(dataContainer.getName(), wD)
+        return wD
+
+    def __getInstanceConnections(self, dataContainer):
+        """Get instance connections (e.g., intermolecular bonds and non-primary connectivity)
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: instConnectCountD{<bond_type>: count, ... }
+
+            For instance, the following are calculated:
+                     {Get counting information about intermolecular linkages.
+            covale  .
+            disulf  .
+            hydrog  .
+            metalc
+
+            loop_
+            _struct_asym.id
+            _struct_asym.pdbx_blank_PDB_chainid_flag
+            _struct_asym.pdbx_modified
+            _struct_asym.entity_id
+            _struct_asym.details
+            A N N 1 ?
+            B N N 1 ?
+            #
+            _struct_biol.id   1
+            #
+            loop_
+            _struct_conn.id
+            _struct_conn.conn_type_id
+            _struct_conn.pdbx_leaving_atom_flag
+            _struct_conn.pdbx_PDB_id
+            _struct_conn.ptnr1_label_asym_id
+            _struct_conn.ptnr1_label_comp_id
+            _struct_conn.ptnr1_label_seq_id
+            _struct_conn.ptnr1_label_atom_id
+            _struct_conn.pdbx_ptnr1_label_alt_id
+            _struct_conn.pdbx_ptnr1_PDB_ins_code
+            _struct_conn.pdbx_ptnr1_standard_comp_id
+            _struct_conn.ptnr1_symmetry
+            _struct_conn.ptnr2_label_asym_id
+            _struct_conn.ptnr2_label_comp_id
+            _struct_conn.ptnr2_label_seq_id
+            _struct_conn.ptnr2_label_atom_id
+            _struct_conn.pdbx_ptnr2_label_alt_id
+            _struct_conn.pdbx_ptnr2_PDB_ins_code
+            _struct_conn.ptnr1_auth_asym_id
+            _struct_conn.ptnr1_auth_comp_id
+            _struct_conn.ptnr1_auth_seq_id
+            _struct_conn.ptnr2_auth_asym_id
+            _struct_conn.ptnr2_auth_comp_id
+            _struct_conn.ptnr2_auth_seq_id
+            _struct_conn.ptnr2_symmetry
+            _struct_conn.pdbx_ptnr3_label_atom_id
+            _struct_conn.pdbx_ptnr3_label_seq_id
+            _struct_conn.pdbx_ptnr3_label_comp_id
+            _struct_conn.pdbx_ptnr3_label_asym_id
+            _struct_conn.pdbx_ptnr3_label_alt_id
+            _struct_conn.pdbx_ptnr3_PDB_ins_code
+            _struct_conn.details
+            _struct_conn.pdbx_dist_value
+            _struct_conn.pdbx_value_order
+            disulf1  disulf ? ? A CYS 31 SG ? ? ? 1_555 B CYS 31 SG ? ? A CYS 31 B CYS 31 1_555 ? ? ? ? ? ? ? 1.997 ?
+            covale1  covale ? ? A VAL 8  C  ? ? ? 1_555 A DPR 9  N  ? ? A VAL 8  A DPR 9  1_555 ? ? ? ? ? ? ? 1.360 ?
+            covale2  covale ? ? A DPR 9  C  ? ? ? 1_555 A GLY 10 N  ? ? A DPR 9  A GLY 10 1_555 ? ? ? ? ? ? ? 1.324 ?
+            #
+        """
+        iAttMapD = {
+            "id": "id",
+            "connect_type": "conn_type_id",
+            "connect_target_label_comp_id": "ptnr1_label_comp_id",
+            "connect_target_label_asym_id": "ptnr1_label_asym_id",
+            "connect_target_label_seq_id": "ptnr1_label_seq_id",
+            "connect_target_label_atom_id": "ptnr1_label_atom_id",
+            "connect_target_label_alt_id": "pdbx_ptnr1_label_alt_id",
+            "connect_target_symmetry": "ptnr1_symmetry",
+            #
+            "connect_partner_label_comp_id": "ptnr2_label_comp_id",
+            "connect_partner_label_asym_id": "ptnr2_label_asym_id",
+            "connect_partner_label_seq_id": "ptnr2_label_seq_id",
+            "connect_partner_label_atom_id": "ptnr2_label_atom_id",
+            "connect_partner_label_alt_id": "pdbx_ptnr2_label_alt_id",
+            "connect_partner_symmetry": "ptnr2_symmetry",
+            "value_order": "pdbx_value_order",
+            "dist_value": "pdbx_dist_value",
+            "description": "details",
+            "role": "pdbx_role",
+        }
+        jAttMapD = {
+            "id": "id",
+            "connect_type": "conn_type_id",
+            "connect_target_label_comp_id": "ptnr2_label_comp_id",
+            "connect_target_label_asym_id": "ptnr2_label_asym_id",
+            "connect_target_label_seq_id": "ptnr2_label_seq_id",
+            "connect_target_label_atom_id": "ptnr2_label_atom_id",
+            "connect_target_label_alt_id": "pdbx_ptnr2_label_alt_id",
+            "connect_target_symmetry": "ptnr2_symmetry",
+            #
+            "connect_partner_label_comp_id": "ptnr1_label_comp_id",
+            "connect_partner_label_asym_id": "ptnr1_label_asym_id",
+            "connect_partner_label_seq_id": "ptnr1_label_seq_id",
+            "connect_partner_label_atom_id": "ptnr1_label_atom_id",
+            "connect_partner_label_alt_id": "pdbx_ptnr1_label_alt_id",
+            "connect_partner_symmetry": "ptnr1_symmetry",
+            "value_order": "pdbx_value_order",
+            "dist_value": "pdbx_dist_value",
+            "description": "details",
+            "role": "pdbx_role",
+        }
+        typeMapD = {
+            "covale": "covalent bond",
+            "disulf": "disulfide bridge",
+            "hydrog": "hydrogen bond",
+            "metalc": "metal coordination",
+            "mismat": "mismatched base pairs",
+            "saltbr": "ionic interaction",
+            "modres": "covalent residue modification",
+            "covale_base": "covalent modification of a nucleotide base",
+            "covale_sugar": "covalent modification of a nucleotide sugar",
+            "covale_phosphate": "covalent modification of a nucleotide phosphate",
+        }
+        #
+        instConnectL = []
+        instConnectCountD = {ky: 0 for ky in typeMapD}
+        boundNonpolymerEntityD = {}
+        boundNonpolymerInstanceD = {}
+        boundNonpolymerComponentIdL = []
+        #
+        if dataContainer.exists("struct_conn"):
+            tObj = dataContainer.getObj("struct_conn")
+            for ii in range(tObj.getRowCount()):
+                bt = str(tObj.getValue("conn_type_id", ii)).strip().lower()
+                if bt not in instConnectCountD:
+                    logger.error("Unsupported intermolecular bond type %r in %r", bt, dataContainer.getName())
+                    continue
+                instConnectCountD[bt] = instConnectCountD[bt] + 1 if bt in instConnectCountD else instConnectCountD[bt]
+                #
+                tD = OrderedDict()
+                for ky, atName in iAttMapD.items():
+                    if tObj.hasAttribute(atName):
+                        val = tObj.getValue(atName, ii) if atName != "conn_type_id" else typeMapD[tObj.getValue(atName, ii).lower()]
+                        tD[ky] = val
+                instConnectL.append(tD)
+                # Flip the bond sense so all target connections are accounted for
+                tD = OrderedDict()
+                for ky, atName in jAttMapD.items():
+                    if tObj.hasAttribute(atName):
+                        val = tObj.getValue(atName, ii) if atName != "conn_type_id" else typeMapD[tObj.getValue(atName, ii).lower()]
+                        tD[ky] = val
+                instConnectL.append(tD)
+
+            boundNonpolymerEntityD, boundNonpolymerInstanceD, boundNonpolymerComponentIdL = self.__getBoundNonpolymers(dataContainer, instConnectL)
+
+        return {
+            "instConnectL": instConnectL,
+            "instConnectCountD": instConnectCountD,
+            "boundNonpolymerEntityD": boundNonpolymerEntityD,
+            "boundNonpolymerInstanceD": boundNonpolymerInstanceD,
+            "boundNonpolymerComponentIdL": boundNonpolymerComponentIdL,
+        }
+
+    def __getBoundNonpolymers(self, dataContainer, instConnectL):
+        """Get nonpolymer bound
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            bool: True for success or False otherwise
+
+        Example:
+        """
+        logger.debug("Starting with %r", dataContainer.getName())
+        #
+        boundNonpolymerEntityD = {}
+        boundNonpolymerInstanceD = {}
+        boundNonpolymerComponentIdL = []
+        try:
+            cDL = instConnectL
+            asymIdD = self.getInstanceEntityMap(dataContainer)
+            # asymAuthIdD = self.getAsymAuthIdMap(dataContainer)
+            eTypeD = self.getEntityTypes(dataContainer)
+            #
+            ts = set()
+            for cD in cDL:
+                tAsymId = cD["connect_target_label_asym_id"]
+                tEntityId = asymIdD[tAsymId]
+                if eTypeD[tEntityId] == "non-polymer" and cD["connect_type"] in ["covale", "covalent bond", "metalc", "metal coordination"]:
+                    pAsymId = cD["connect_partner_label_asym_id"]
+                    pEntityId = asymIdD[pAsymId]
+                    pCompId = cD["connect_partner_label_comp_id"]
+                    pSeqId = cD["connect_partner_label_seq_id"]
+                    tCompId = cD["connect_target_label_comp_id"]
+                    bondOrder = cD["value_order"]
+                    bondDist = cD["dist_value"]
+                    pType = eTypeD[pEntityId]
+                    #
+                    ts.add(tCompId)
+                    boundNonpolymerInstanceD.setdefault(tAsymId, []).append(
+                        NonpolymerBoundInstance(tCompId, cD["connect_type"], pCompId, pAsymId, pType, pSeqId, bondDist, bondOrder)
+                    )
+                    boundNonpolymerEntityD.setdefault(tEntityId, []).append(NonpolymerBoundEntity(tCompId, cD["connect_type"], pCompId, pEntityId, pType))
+            #
+            for asymId in boundNonpolymerInstanceD:
+                boundNonpolymerInstanceD[asymId] = sorted(set(boundNonpolymerInstanceD[asymId]))
+            for entityId in boundNonpolymerEntityD:
+                boundNonpolymerEntityD[entityId] = sorted(set(boundNonpolymerEntityD[entityId]))
+            boundNonpolymerComponentIdL = sorted(ts)
+        except Exception as e:
+            logger.exception("%s failing with %s", dataContainer.getName(), str(e))
+        return boundNonpolymerEntityD, boundNonpolymerInstanceD, boundNonpolymerComponentIdL
+
+    def getEntitySequenceFeatureCounts(self, dataContainer):
+        """Return a dictionary of sequence feature counts.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {<entity>: {'mutation': #, 'artifact': #, 'conflict': #, ...  }, }
+
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchSequenceFeatures(dataContainer)
+        return wD["seqFeatureCountsD"] if "seqFeatureCountsD" in wD else {}
+
+    def getEntitySequenceMonomerFeatures(self, dataContainer):
+        """Return a dictionary of sequence monomer features.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {(entityId,seqId,compId,filteredFeature): {detail,detail},  .. }
+
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchSequenceFeatures(dataContainer)
+        return wD["seqMonomerFeatureD"] if "seqMonomerFeatureD" in wD else {}
+
+    def getEntitySequenceRangeFeatures(self, dataContainer):
+        """Return a dictionary of sequence range features.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {(entityId,benSeqId,endSeqId,filteredFeature): {detail,detail},  .. }
+
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchSequenceFeatures(dataContainer)
+        return wD["seqRangeFeatureD"] if "seqRangeFeatureD" in wD else {}
+
+    def getEntityReferenceAlignments(self, dataContainer):
+        """Return a dictionary of reference sequence alignments for each entity.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {entityId: {'dbName': , 'dbAccession': , 'authAsymId': , 'entitySeqIdBeg':, 'dbSeqIdBeg':, ... },  .. }
+
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchReferenceSequenceDetails(dataContainer)
+        return wD["seqEntityAlignmentD"] if "seqEntityAlignmentD" in wD else {}
+
+    def getEntityPolymerSequences(self, dataContainer):
+        """Return a dictionary of the sequences (one-letter-codes) for each polymer entity.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {entityId: {'sequence': ..., 'polymerType': ... , 'polymerTypeFiltered': ... },  ... }
+
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchReferenceSequenceDetails(dataContainer)
+        return wD["entityPolymerSequenceD"] if "entityPolymerSequenceD" in wD else {}
+
+    def getEntitySequenceReferenceCodes(self, dataContainer):
+        """Return a dictionary of reference database accession codes.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {entityId: {'dbName': , 'dbAccession': },  ... }
+
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchReferenceSequenceDetails(dataContainer)
+        return wD["seqEntityRefDbD"] if "seqEntityRefDbD" in wD else {}
+
+    def __fetchSequenceFeatures(self, dataContainer):
+        wD = self.__entitySequenceFeatureCache.get(dataContainer.getName())
+        if not wD:
+            wD = self.__getSequenceFeatures(dataContainer)
+            self.__entitySequenceFeatureCache.set(dataContainer.getName(), wD)
+        return wD
+
+    def __fetchReferenceSequenceDetails(self, dataContainer):
+        wD = self.__entityReferenceSequenceDetailsCache.get(dataContainer.getName())
+        if not wD:
+            wD = self.__getReferenceSequenceDetails(dataContainer)
+            self.__entityReferenceSequenceDetailsCache.set(dataContainer.getName(), wD)
+        return wD
+
+    def getDatabaseNameMap(self):
+        dbNameMapD = {
+            "UNP": "UniProt",
+            "GB": "GenBank",
+            "PDB": "PDB",
+            "EMBL": "EMBL",
+            "GENP": "GenBank",
+            "NDB": "NDB",
+            "NOR": "NORINE",
+            "PIR": "PIR",
+            "PRF": "PRF",
+            "REF": "RefSeq",
+            "TPG": "GenBank",
+            "TREMBL": "UniProt",
+            "SWS": "UniProt",
+            "SWALL": "UniProt",
+        }
+        return dbNameMapD
+
+    def __getReferenceSequenceDetails(self, dataContainer):
+        """Get reference sequence and related alignment details.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+
+        Returns:
+            dict : {
+                    "seqEntityAlignmentD" : {entityId: [{'dbName': 'UNP' , 'dbAccession': 'P000000', ... }]}
+                    "seqEntityRefDbD":  {entityId: [{'dbName': 'UNP' , 'dbAccession': 'P000000'),  }]},
+                    }
+
+        Example source content:
+
+            _struct_ref.id                         1
+            _struct_ref.db_name                    UNP
+            _struct_ref.db_code                    KSYK_HUMAN
+            _struct_ref.pdbx_db_accession          P43405
+            _struct_ref.entity_id                  1
+            _struct_ref.pdbx_seq_one_letter_code
+            ;ADPEEIRPKEVYLDRKLLTLEDKELGSGNFGTVKKGYYQMKKVVKTVAVKILKNEANDPALKDELLAEANVMQQLDNPYI
+            VRMIGICEAESWMLVMEMAELGPLNKYLQQNRHVKDKNIIELVHQVSMGMKYLEESNFVHRDLAARNVLLVTQHYAKISD
+            FGLSKALRADENYYKAQTHGKWPVKWYAPECINYYKFSSKSDVWSFGVLMWEAFSYGQKPYRGMKGSEVTAMLEKGERMG
+            CPAGCPREMYDLMNLCWTYDVENRPGFAAVELRLRNYYYDVVN
+            ;
+            _struct_ref.pdbx_align_begin           353
+            _struct_ref.pdbx_db_isoform            ?
+            #
+            _struct_ref_seq.align_id                      1
+            _struct_ref_seq.ref_id                        1
+            _struct_ref_seq.pdbx_PDB_id_code              1XBB
+            _struct_ref_seq.pdbx_strand_id                A
+            _struct_ref_seq.seq_align_beg                 1
+            _struct_ref_seq.pdbx_seq_align_beg_ins_code   ?
+            _struct_ref_seq.seq_align_end                 283
+            _struct_ref_seq.pdbx_seq_align_end_ins_code   ?
+            _struct_ref_seq.pdbx_db_accession             P43405
+            _struct_ref_seq.db_align_beg                  353
+            _struct_ref_seq.pdbx_db_align_beg_ins_code    ?
+            _struct_ref_seq.db_align_end                  635
+            _struct_ref_seq.pdbx_db_align_end_ins_code    ?
+            _struct_ref_seq.pdbx_auth_seq_align_beg       353
+            _struct_ref_seq.pdbx_auth_seq_align_end       635
+            _struct_ref_seq.rcsb_entity_id                1
+            #
+            loop_
+            _struct_ref_seq_dif.align_id
+            _struct_ref_seq_dif.pdbx_pdb_id_code
+            _struct_ref_seq_dif.mon_id
+            _struct_ref_seq_dif.pdbx_pdb_strand_id
+            _struct_ref_seq_dif.seq_num
+            _struct_ref_seq_dif.pdbx_pdb_ins_code
+            _struct_ref_seq_dif.pdbx_seq_db_name
+            _struct_ref_seq_dif.pdbx_seq_db_accession_code
+            _struct_ref_seq_dif.db_mon_id
+            _struct_ref_seq_dif.pdbx_seq_db_seq_num
+            _struct_ref_seq_dif.details
+            _struct_ref_seq_dif.pdbx_auth_seq_num
+            _struct_ref_seq_dif.pdbx_ordinal
+            _struct_ref_seq_dif.rcsb_entity_id
+            1 1XBB MET A 1   ? UNP P43405 ALA 353 'CLONING ARTIFACT' 353 1  1
+            1 1XBB ALA A 2   ? UNP P43405 ASP 354 'CLONING ARTIFACT' 354 2  1
+            1 1XBB LEU A 3   ? UNP P43405 PRO 355 'CLONING ARTIFACT' 355 3  1
+            1 1XBB GLU A 284 ? UNP P43405 ?   ?   'CLONING ARTIFACT' 636 4  1
+            1 1XBB GLY A 285 ? UNP P43405 ?   ?   'CLONING ARTIFACT' 637 5  1
+            1 1XBB HIS A 286 ? UNP P43405 ?   ?   'EXPRESSION TAG'   638 6  1
+            1 1XBB HIS A 287 ? UNP P43405 ?   ?   'EXPRESSION TAG'   639 7  1
+            1 1XBB HIS A 288 ? UNP P43405 ?   ?   'EXPRESSION TAG'   640 8  1
+            1 1XBB HIS A 289 ? UNP P43405 ?   ?   'EXPRESSION TAG'   641 9  1
+            1 1XBB HIS A 290 ? UNP P43405 ?   ?   'EXPRESSION TAG'   642 10 1
+            1 1XBB HIS A 291 ? UNP P43405 ?   ?   'EXPRESSION TAG'   643 11 1
+            #
+            #
+            loop_
+            _struct_ref_seq_dif.align_id
+            _struct_ref_seq_dif.pdbx_pdb_id_code
+            _struct_ref_seq_dif.mon_id
+            _struct_ref_seq_dif.pdbx_pdb_strand_id
+            _struct_ref_seq_dif.seq_num
+            _struct_ref_seq_dif.pdbx_pdb_ins_code
+            _struct_ref_seq_dif.pdbx_seq_db_name
+            _struct_ref_seq_dif.pdbx_seq_db_accession_code
+            _struct_ref_seq_dif.db_mon_id
+            _struct_ref_seq_dif.pdbx_seq_db_seq_num
+            _struct_ref_seq_dif.details
+            _struct_ref_seq_dif.pdbx_auth_seq_num
+            _struct_ref_seq_dif.pdbx_ordinal
+            _struct_ref_seq_dif.rcsb_entity_id
+            1 3RIJ TYR A 53  ? UNP Q5SHN1 PHE 54  'ENGINEERED MUTATION' 54  1  1
+            1 3RIJ GLY A 54  ? UNP Q5SHN1 VAL 55  'ENGINEERED MUTATION' 55  2  1
+            2 3RIJ ASP A 98  ? UNP Q5SHN1 ALA 99  'ENGINEERED MUTATION' 99  3  1
+            2 3RIJ ALA A 99  ? UNP Q5SHN1 ILE 100 'ENGINEERED MUTATION' 100 4  1
+            2 3RIJ LEU A 158 ? UNP Q5SHN1 ?   ?   INSERTION             159 5  1
+            2 3RIJ GLU A 159 ? UNP Q5SHN1 ?   ?   INSERTION             160 6  1
+            2 3RIJ HIS A 160 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      161 7  1
+            2 3RIJ HIS A 161 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      162 8  1
+            2 3RIJ HIS A 162 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      163 9  1
+            2 3RIJ HIS A 163 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      164 10 1
+            2 3RIJ HIS A 164 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      165 11 1
+            2 3RIJ HIS A 165 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      166 12 1
+            3 3RIJ TYR B 53  ? UNP Q5SHN1 PHE 54  'ENGINEERED MUTATION' 54  13 1
+            3 3RIJ GLY B 54  ? UNP Q5SHN1 VAL 55  'ENGINEERED MUTATION' 55  14 1
+            4 3RIJ ASP B 98  ? UNP Q5SHN1 ALA 99  'ENGINEERED MUTATION' 99  15 1
+            4 3RIJ ALA B 99  ? UNP Q5SHN1 ILE 100 'ENGINEERED MUTATION' 100 16 1
+            4 3RIJ LEU B 158 ? UNP Q5SHN1 ?   ?   INSERTION             159 17 1
+            4 3RIJ GLU B 159 ? UNP Q5SHN1 ?   ?   INSERTION             160 18 1
+            4 3RIJ HIS B 160 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      161 19 1
+            4 3RIJ HIS B 161 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      162 20 1
+            4 3RIJ HIS B 162 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      163 21 1
+            4 3RIJ HIS B 163 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      164 22 1
+            4 3RIJ HIS B 164 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      165 23 1
+            4 3RIJ HIS B 165 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      166 24 1
+            5 3RIJ TYR C 53  ? UNP Q5SHN1 PHE 54  'ENGINEERED MUTATION' 54  25 1
+            5 3RIJ GLY C 54  ? UNP Q5SHN1 VAL 55  'ENGINEERED MUTATION' 55  26 1
+            6 3RIJ ASP C 98  ? UNP Q5SHN1 ALA 99  'ENGINEERED MUTATION' 99  27 1
+            6 3RIJ ALA C 99  ? UNP Q5SHN1 ILE 100 'ENGINEERED MUTATION' 100 28 1
+            6 3RIJ LEU C 158 ? UNP Q5SHN1 ?   ?   INSERTION             159 29 1
+            6 3RIJ GLU C 159 ? UNP Q5SHN1 ?   ?   INSERTION             160 30 1
+            6 3RIJ HIS C 160 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      161 31 1
+            6 3RIJ HIS C 161 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      162 32 1
+            6 3RIJ HIS C 162 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      163 33 1
+            6 3RIJ HIS C 163 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      164 34 1
+            6 3RIJ HIS C 164 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      165 35 1
+            6 3RIJ HIS C 165 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      166 36 1
+            7 3RIJ TYR D 53  ? UNP Q5SHN1 PHE 54  'ENGINEERED MUTATION' 54  37 1
+            7 3RIJ GLY D 54  ? UNP Q5SHN1 VAL 55  'ENGINEERED MUTATION' 55  38 1
+            8 3RIJ ASP D 98  ? UNP Q5SHN1 ALA 99  'ENGINEERED MUTATION' 99  39 1
+            8 3RIJ ALA D 99  ? UNP Q5SHN1 ILE 100 'ENGINEERED MUTATION' 100 40 1
+            8 3RIJ LEU D 158 ? UNP Q5SHN1 ?   ?   INSERTION             159 41 1
+            8 3RIJ GLU D 159 ? UNP Q5SHN1 ?   ?   INSERTION             160 42 1
+            8 3RIJ HIS D 160 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      161 43 1
+            8 3RIJ HIS D 161 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      162 44 1
+            8 3RIJ HIS D 162 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      163 45 1
+            8 3RIJ HIS D 163 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      164 46 1
+            8 3RIJ HIS D 164 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      165 47 1
+            8 3RIJ HIS D 165 ? UNP Q5SHN1 ?   ?   'EXPRESSION TAG'      166 48 1
+            #
+        """
+        logger.debug("Starting with %r", dataContainer.getName())
+        self.__addStructRefSeqEntityIds(dataContainer)
+        #
+        #  To exclude self references -
+        excludeRefDbList = ["PDB"]
+        rD = {"seqEntityAlignmentD": {}, "seqEntityRefDbD": {}, "entityPolymerSeqenceD": {}}
+        try:
+            # Exit if source categories are missing
+            if not (dataContainer.exists("struct_ref_seq") and dataContainer.exists("struct_ref") and dataContainer.exists("entity_poly")):
+                return rD
+            # ------- --------- ------- --------- ------- --------- ------- --------- ------- ---------
+            entityPolymerSequenceD = {}
+            if dataContainer.exists("entity_poly"):
+                epObj = dataContainer.getObj("entity_poly")
+                for ii in range(epObj.getRowCount()):
+                    entityId = epObj.getValue("entity_id", ii)
+                    pType = epObj.getValue("type", ii)
+                    pTypeFiltered = self.filterEntityPolyType(pType)
+                    if epObj.hasAttribute("pdbx_seq_one_letter_code_can"):
+                        sampleSeq = self.__stripWhiteSpace(epObj.getValue("pdbx_seq_one_letter_code_can", ii))
+                        if sampleSeq and sampleSeq not in ["?", "."]:
+                            entityPolymerSequenceD[entityId] = {"sequence": sampleSeq, "polymerType": pType, "polymerTypeFiltered": pTypeFiltered}
+            #
+            srObj = None
+            if dataContainer.exists("struct_ref"):
+                srObj = dataContainer.getObj("struct_ref")
+            #
+            srsObj = None
+            if dataContainer.exists("struct_ref_seq"):
+                srsObj = dataContainer.getObj("struct_ref_seq")
+
+            # srsdObj = None
+            # if dataContainer.exists("struct_ref_seq_dif"):
+            #    srsdObj = dataContainer.getObj("struct_ref_seq_dif")
+
+            polymerEntityTypeD = self.getPolymerEntityFilteredTypes(dataContainer)
+            # Map alignId -> entityId
+            seqEntityRefDbD = {}
+            tupSeqEntityRefDbD = {}
+            alignEntityMapD = {}
+            # entity alignment details
+            seqEntityAlignmentD = {}
+            for ii in range(srObj.getRowCount()):
+                dbAccessionAlignS = set()
+                entityId = srObj.getValue("entity_id", ii)
+                refId = srObj.getValue("id", ii)
+                dbName = str(srObj.getValue("db_name", ii)).strip().upper()
+                #
+                if dbName in excludeRefDbList:
+                    continue
+                #
+                if entityId not in polymerEntityTypeD:
+                    logger.debug("%s skipping non-polymer entity %r in sequence reference", dataContainer.getName(), entityId)
+                    continue
+
+                if dbName in ["UNP"] and polymerEntityTypeD[entityId] != "Protein":
+                    logger.debug("%s skipping inconsistent reference assignment for %s polymer type %s", dataContainer.getName(), dbName, polymerEntityTypeD[entityId])
+                    continue
+                #
+                tS = srObj.getValue("pdbx_db_accession", ii)
+                dbAccession = tS if tS and tS not in [".", "?"] else None
+                #
+                tS = srObj.getValue("pdbx_db_isoform", ii)
+                dbIsoform = tS if tS and tS not in [".", "?"] else None
+                # Look for a stray isoform
+                if dbName in ["UNP"] and dbAccession and "-" in dbAccession:
+                    if not dbIsoform:
+                        dbIsoform = dbAccession
+                    ff = dbAccession.split("-")
+                    dbAccession = ff[0]
+
+                #
+                if dbIsoform and dbAccession not in dbIsoform:
+                    logger.debug("entryId %r entityId %r accession %r isoform %r inconsistency", dataContainer.getName(), entityId, dbAccession, dbIsoform)
+                # ---
+                # Get indices for the target refId.
+                iRowL = srsObj.selectIndices(refId, "ref_id")
+                logger.debug("entryId %r entityId %r refId %r rowList %r", dataContainer.getName(), entityId, refId, iRowL)
+                entitySeqIdBeg = entitySeqIdEnd = 0
+                for iRow in iRowL:
+                    try:
+                        entitySeqIdBeg = srsObj.getValue("seq_align_beg", iRow)
+                        entitySeqIdEnd = srsObj.getValue("seq_align_end", iRow)
+                        entityAlignLength = int(entitySeqIdEnd) - int(entitySeqIdBeg) + 1
+                    except Exception:
+                        entityAlignLength = 0
+                    #
+                    if entityAlignLength <= 0:
+                        logger.debug("%s entity %r skipping bad alignment seqBeg %r seqEnd %r", dataContainer.getName(), entityId, entitySeqIdBeg, entitySeqIdEnd)
+                        continue
+
+                    alignId = srsObj.getValue("align_id", iRow)
+                    alignEntityMapD[alignId] = entityId
+                    #
+                    authAsymId = srsObj.getValue("pdbx_strand_id", iRow)
+                    dbSeqIdBeg = srsObj.getValue("db_align_beg", iRow)
+                    dbSeqIdEnd = srsObj.getValue("db_align_end", iRow)
+                    # ----
+                    try:
+                        idbSeqIdBeg = int(dbSeqIdBeg)
+                        if idbSeqIdBeg == 0:
+                            idbSeqIdBeg = 1
+                            dbSeqIdBeg = str(idbSeqIdBeg)
+                            idbSeqIdEnd = int(dbSeqIdEnd)
+                            idbSeqIdEnd += 1
+                            dbSeqIdEnd = str(idbSeqIdEnd)
+                            logger.debug("%s offset reference sequence database position", dataContainer.getName())
+                    except Exception:
+                        pass
+                    # ----
+                    #
+                    tS = srsObj.getValue("pdbx_db_accession", iRow)
+                    # use the parent pdbx_accession
+                    dbAccessionAlign = tS if tS and tS not in [".", "?"] else dbAccession
+                    # Look for a stray isoform
+                    if dbName in ["UNP"] and dbAccessionAlign and "-" in dbAccessionAlign:
+                        if not dbIsoform:
+                            dbIsoform = dbAccessionAlign
+                        ff = dbAccessionAlign.split("-")
+                        dbAccessionAlign = ff[0]
+
+                    dbAccessionAlignS.add(dbAccessionAlign)
+                    #
+                    #
+                    seqEntityAlignmentD.setdefault(entityId, []).append(
+                        SeqAlign(
+                            "PDB",
+                            **{
+                                "authAsymId": authAsymId,
+                                "entitySeqIdBeg": entitySeqIdBeg,
+                                "entitySeqIdEnd": entitySeqIdEnd,
+                                "dbSeqIdBeg": dbSeqIdBeg,
+                                "dbSeqIdEnd": dbSeqIdEnd,
+                                "dbName": dbName,
+                                "dbAccession": dbAccessionAlign,
+                                "dbIsoform": dbIsoform,
+                                "entityAlignLength": entityAlignLength,
+                            },
+                        )
+                    )
+                # Check consistency
+                try:
+                    if len(dbAccessionAlignS) == 1 and list(dbAccessionAlignS)[0] == dbAccession:
+                        tupSeqEntityRefDbD.setdefault(entityId, []).append((dbName, dbAccession, dbIsoform))
+                    elif len(dbAccessionAlignS) == 1 and list(dbAccessionAlignS)[0]:
+                        tupSeqEntityRefDbD.setdefault(entityId, []).append((dbName, list(dbAccessionAlignS)[0], None))
+                    elif dbAccession:
+                        tupSeqEntityRefDbD.setdefault(entityId, []).append((dbName, dbAccession, dbIsoform))
+                    else:
+                        logger.debug("%s entityId %r inconsistent reference sequence %r %r", dataContainer.getName(), entityId, dbAccession, dbAccessionAlignS)
+                except Exception:
+                    logger.exception("%s entityId %r inconsistent reference sequence %r %r", dataContainer.getName(), entityId, dbAccession, dbAccessionAlignS)
+
+            # -----
+            dbMapD = self.getDatabaseNameMap()
+            for entityId, tupL in tupSeqEntityRefDbD.items():
+                uTupL = list(OrderedDict({tup: True for tup in tupL}).keys())
+                for tup in uTupL:
+                    tS = dbMapD[tup[0]] if tup[0] in dbMapD else tup[0]
+                    if tup[1]:
+                        seqEntityRefDbD.setdefault(entityId, []).append({"dbName": tS, "dbAccession": tup[1], "dbIsoform": tup[2]})
+                    else:
+                        logger.debug("%s %s skipping incomplete sequence reference assignment %r", dataContainer.getName(), entityId, tup)
+
+            return {
+                "seqEntityAlignmentD": seqEntityAlignmentD,
+                "seqEntityRefDbD": seqEntityRefDbD,
+                "entityPolymerSequenceD": entityPolymerSequenceD,
+            }
+        except Exception as e:
+            logger.exception("%s failing with %s", dataContainer.getName(), str(e))
+        return rD
+
+    def __getSequenceFeatures(self, dataContainer):
+        """Get point and range sequence features.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+
+        Returns:
+            dict : {"seqFeatureCountsD": {entityId: {"mutation": #, "conflict": # ... }, }
+                    "seqMonomerFeatureD": {(entityId, seqId, compId, filteredFeature): set(feature,...), ...}
+                    "seqRangeFeatureD" : {(entityId, str(beg), str(end), "artifact"): set(details)}
+                    }
+
+        """
+        logger.debug("Starting with %r", dataContainer.getName())
+        self.__addStructRefSeqEntityIds(dataContainer)
+        #
+        #  To exclude self references -
+        # excludeRefDbList = ["PDB"]
+        rD = {"seqFeatureCountsD": {}, "seqMonomerFeatureD": {}, "seqRangeFeatureD": {}}
+        try:
+            # Exit if source categories are missing
+            if not (dataContainer.exists("struct_ref_seq") and dataContainer.exists("struct_ref")):
+                return rD
+            # ------- --------- ------- --------- ------- --------- ------- --------- ------- ---------
+            # srObj = None
+            # if dataContainer.exists("struct_ref"):
+            #    srObj = dataContainer.getObj("struct_ref")
+            #
+            # srsObj = None
+            # if dataContainer.exists("struct_ref_seq"):
+            #    srsObj = dataContainer.getObj("struct_ref_seq")
+
+            srsdObj = None
+            if dataContainer.exists("struct_ref_seq_dif"):
+                srsdObj = dataContainer.getObj("struct_ref_seq_dif")
+
+            # polymerEntityTypeD = self.getPolymerEntityFilteredTypes(dataContainer)
+            #
+            # ------- --------- ------- --------- ------- --------- ------- --------- ------- ---------
+            #   (entityId, seqId, compId, filteredFeature) -> set{details, ...}
+            #
+            seqFeatureCountsD = {}
+            seqMonomerFeatureD = {}
+            seqRangeFeatureD = {}
+            entityArtifactD = {}
+            seqIdDetailsD = {}
+            if srsdObj:
+                for ii in range(srsdObj.getRowCount()):
+                    # alignId = srsdObj.getValue("align_id", ii)
+                    #
+                    # entityId = alignEntityMapD[alignId]
+                    entityId = srsdObj.getValueOrDefault("rcsb_entity_id", ii, defaultValue=None)
+                    if not entityId:
+                        continue
+                    #
+                    # authAsymId = srsdObj.getValue("pdbx_pdb_strand_id", ii)
+                    # dbName = srsdObj.getValue("pdbx_seq_db_name", ii)
+                    #
+                    # Can't rely on alignId
+                    # Keep difference records for self-referenced entity sequences.
+                    # if alignId not in alignEntityMapD and dbName not in excludeRefDbList:
+                    #    logger.warning("%s inconsistent alignment ID %r in difference record %d", dataContainer.getName(), alignId, ii + 1)
+                    #    continue
+                    #
+                    seqId = srsdObj.getValueOrDefault("seq_num", ii, defaultValue=None)
+                    if not seqId:
+                        continue
+                    compId = srsdObj.getValue("mon_id", ii)
+                    #
+                    details = srsdObj.getValue("details", ii)
+                    filteredDetails = self.filterRefSequenceDif(details)
+                    if filteredDetails == "artifact":
+                        try:
+                            entityArtifactD.setdefault(entityId, []).append(int(seqId))
+                            seqIdDetailsD[int(seqId)] = details.lower()
+                        except Exception:
+                            logger.debug("Incomplete sequence difference for %r %r %r %r", dataContainer.getName(), entityId, seqId, details)
+                    else:
+                        seqMonomerFeatureD.setdefault((entityId, seqId, compId, filteredDetails), set()).add(details.lower())
+                #
+                # Consolidate the artifacts as ranges -
+                for entityId, sL in entityArtifactD.items():
+                    # logger.debug("%s artifact ranges SL %r ranges %r", dataContainer.getName(), sL, list(self.__toRangeList(sL)))
+                    srL = self.__toRangeList(sL)
+                    for sr in srL:
+                        seqRangeFeatureD.setdefault((entityId, str(sr[0]), str(sr[1]), "artifact"), set()).update([seqIdDetailsD[sr[0]], seqIdDetailsD[sr[1]]])
+                # JDW
+                # logger.info("%s seqMonomerFeatureD %r ", dataContainer.getName(), seqMonomerFeatureD)
+                #
+                # Tabulate sequence monomer features by entity for the filtered cases -
+                for (entityId, _, _, fDetails), _ in seqMonomerFeatureD.items():
+                    if entityId not in seqFeatureCountsD:
+                        seqFeatureCountsD[entityId] = {"mutation": 0, "artifact": 0, "insertion": 0, "deletion": 0, "conflict": 0, "other": 0}
+                    seqFeatureCountsD[entityId][fDetails] += 1
+                #
+                #
+                # Tabulate sequence range features by entity for the filtered cases -
+                for (entityId, _, _, fDetails), _ in seqRangeFeatureD.items():
+                    if entityId not in seqFeatureCountsD:
+                        seqFeatureCountsD[entityId] = {"mutation": 0, "artifact": 0, "insertion": 0, "deletion": 0, "conflict": 0, "other": 0}
+                    seqFeatureCountsD[entityId][fDetails] += 1
+
+            return {
+                "seqFeatureCountsD": seqFeatureCountsD,
+                "seqMonomerFeatureD": seqMonomerFeatureD,
+                "seqRangeFeatureD": seqRangeFeatureD,
+            }
+        except Exception as e:
+            logger.exception("%s failing with %s", dataContainer.getName(), str(e))
+        return rD
+
+    def __addStructRefSeqEntityIds(self, dataContainer):
+        """Add entity ids in categories struct_ref_seq and struct_ref_seq_dir instances.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+            atName (str): Attribute name
+
+        Returns:
+            bool: True for success or False otherwise
+
+        """
+        catName = "struct_ref_seq"
+        try:
+            logger.debug("Starting with %r %r", dataContainer.getName(), catName)
+            #
+            if not (dataContainer.exists(catName) and dataContainer.exists("struct_ref")):
+                return False
+            #
+            atName = "rcsb_entity_id"
+            srsObj = dataContainer.getObj(catName)
+            if not srsObj.hasAttribute(atName):
+                srsObj.appendAttributeExtendRows(atName, defaultValue="?")
+            else:
+                # skip if attribute has already been added -
+                return True
+            #
+            srObj = dataContainer.getObj("struct_ref")
+            #
+            srsdObj = None
+            if dataContainer.exists("struct_ref_seq_dif"):
+                srsdObj = dataContainer.getObj("struct_ref_seq_dif")
+                if not srsdObj.hasAttribute(atName):
+                    # srsdObj.appendAttribute(atName)
+                    srsdObj.appendAttributeExtendRows(atName, defaultValue="?")
+
+            for ii in range(srObj.getRowCount()):
+                entityId = srObj.getValue("entity_id", ii)
+                refId = srObj.getValue("id", ii)
+                #
+                # Get indices for the target refId.
+                iRowL = srsObj.selectIndices(refId, "ref_id")
+                for iRow in iRowL:
+                    srsObj.setValue(entityId, "rcsb_entity_id", iRow)
+                    alignId = srsObj.getValue("align_id", iRow)
+                    #
+                    if srsdObj:
+                        jRowL = srsdObj.selectIndices(alignId, "align_id")
+                        for jRow in jRowL:
+                            srsdObj.setValue(entityId, "rcsb_entity_id", jRow)
+
+            return True
+        except Exception as e:
+            logger.exception("%s %s failing with %s", dataContainer.getName(), catName, str(e))
+        return False
+
+    def filterRefSequenceDif(self, details):
+        filteredDetails = details
+        if details.upper() in [
+            "ACETYLATION",
+            "CHROMOPHORE",
+            "VARIANT",
+            "MODIFIED RESIDUE",
+            "MODIFIED",
+            "ENGINEERED",
+            "ENGINEERED MUTATION",
+            "AMIDATION",
+            "FORMYLATION",
+            "ALLELIC VARIANT",
+            "AUTOPHOSPHORYLATION",
+            "BENZOYLATION",
+            "CHEMICAL MODIFICATION",
+            "CHEMICALLY MODIFIED",
+            "CHROMOPHOR, REM 999",
+            "CHROMOPHORE, REM 999",
+            "D-CONFIGURATION",
+            "ENGINEERED AND OXIDIZED CYS",
+            "ENGINEERED MUTANT",
+            "ENGINERED MUTATION",
+            "HYDROXYLATION",
+            "METHYLATED ASN",
+            "METHYLATION",
+            "MICROHETEROGENEITY",
+            "MODEIFED RESIDUE",
+            "MODIFICATION",
+            "MODIFIED AMINO ACID",
+            "MODIFIED CHROMOPHORE",
+            "MODIFIED GLN",
+            "MODIFIED RESIDUES",
+            "MUTATION",
+            "MYC EPITOPE",
+            "MYRISTOYLATED",
+            "MYRISTOYLATION",
+            "NATURAL VARIANT",
+            "NATURAL VARIANTS",
+            "OXIDIZED CY",
+            "OXIDIZED CYS",
+            "PHOSPHORYLATION",
+            "POLYMORPHIC VARIANT",
+            "PROPIONATION",
+            "SOMATIC VARIANT",
+            "SUBSTITUTION",
+            "TRNA EDITING",
+            "TRNA MODIFICATION",
+            "TRNA",
+            "VARIANT STRAIN",
+            "VARIANTS",
+        ]:
+            filteredDetails = "mutation"
+        elif details.upper() in [
+            "LEADER SEQUENCE",
+            "INITIATING METHIONINE",
+            "INITIATOR METHIONINE",
+            "LINKER",
+            "EXPRESSION TAG",
+            "CLONING",
+            "CLONING ARTIFACT",
+            "C-TERM CLONING ARTIFA",
+            "C-TERMINAL HIS TAG",
+            "C-TERMINLA HIS-TAG",
+            "CLONING AETIFACT",
+            "CLONING ARATIFACT",
+            "CLONING ARTEFACT",
+            "CLONING ARTFIACT",
+            "CLONING ARTIACT",
+            "CLONING ARTIFACTS",
+            "CLONING ARTUFACT",
+            "CLONING ATIFACT",
+            "CLONING MUTATION",
+            "CLONING REMNANT",
+            "CLONING SITE RESIDUE",
+            "CLONNG ARTIFACT",
+            "CLONONG ARTIFACT",
+            "DETECTION TAG",
+            "ENGINEERED LINKER",
+            "EXPRESSION ARTIFACT",
+            "EXPRESSIOPN TAG",
+            "EXPRSSION TAG",
+            "FLAG TAG",
+            "GCN4 TAG",
+            "GPGS TAG",
+            "GST TAG",
+            "HIA TAG",
+            "HIS TAG",
+            "HIS-TAG",
+            "INITIAL METHIONINE",
+            "INITIATING MET",
+            "INITIATING METHIONIE",
+            "INITIATING MSE",
+            "INITIATING RESIDUE",
+            "INITIATOR N-FORMYL-MET",
+            "INTIATING METHIONINE",
+            "INTRACHAIN HIS TAG",
+            "LINKER INSERTION",
+            "LINKER PEPTIDE",
+            "LINKER RESIDUE",
+            "LINKER SEQUENCE",
+            "LYS TAG",
+            "MOD. RESIDUE/CLONING ARTIFACT",
+            "MYC TAG",
+            "N-TERMINAL EXTENSION",
+            "N-TERMINAL HIS TAG",
+            "PURIFICATION TAG",
+            "RANDOM MUTAGENESIS",
+            "RECOMBINANT HIS TAG",
+            "RESIDUAL LINKER",
+            "STREP-TAGII",
+            "T7 EPITOPE TAG",
+            "T7-TAG",
+            "TAG",
+        ]:
+            filteredDetails = "artifact"
+        elif details.upper() in ["INSERTION", "ENGINEERED INSERTION", "INSERTED", "INSERTION AT N-TERMINUS"]:
+            filteredDetails = "insertion"
+        elif details.upper() in ["DELETION", "CONFLICT/DELETION", "ENGINEERED DELETION"]:
+            filteredDetails = "deletion"
+        elif details.upper() in ["CONFLICT", "SEQUENCE CONFLICT", "SEQUENCE CONFLICT8"]:
+            filteredDetails = "conflict"
+        else:
+            logger.debug("Unanticipated sequence difference details %r", details)
+            filteredDetails = "other"
+        #
+        return filteredDetails
+
+    def filterEntityPolyType(self, pType):
+        """Map input dictionary polymer type to simplified molecular type.
+
+        Args:
+            pType (str): PDBx/mmCIF dictionary polymer type
+
+        Returns:
+            str: simplified mappings
+
+        Returns mappings:
+            'Protein'   'polypeptide(D) or polypeptide(L)'
+            'DNA'       'polydeoxyribonucleotide'
+            'RNA'       'polyribonucleotide'
+            'NA-hybrid' 'polydeoxyribonucleotide/polyribonucleotide hybrid'
+            'Other'      'polysaccharide(D), polysaccharide(L), cyclic-pseudo-peptide, peptide nucleic acid, or other'
+        """
+        polymerType = pType.lower()
+        if polymerType in ["polypeptide(d)", "polypeptide(l)"]:
+            rT = "Protein"
+        elif polymerType in ["polydeoxyribonucleotide"]:
+            rT = "DNA"
+        elif polymerType in ["polyribonucleotide"]:
+            rT = "RNA"
+        elif polymerType in ["polydeoxyribonucleotide/polyribonucleotide hybrid"]:
+            rT = "NA-hybrid"
+        else:
+            rT = "Other"
+        return rT
+
+    def guessEntityPolyTypes(self, monomerL):
+        """Guess the polymer types to from the monomer list.
+
+        Args:
+            monomerL (list): list of monomers (chemical component ids)
+
+        Returns:
+            tuple: polymerType, filtered polymer Type.
+
+        Returns mappings:
+            'Protein'   'polypeptide(D) or polypeptide(L)'
+            'DNA'       'polydeoxyribonucleotide'
+            'RNA'       'polyribonucleotide'
+            'NA-hybrid' 'polydeoxyribonucleotide/polyribonucleotide hybrid'
+            'Other'      'polysaccharide(D), polysaccharide(L), cyclic-pseudo-peptide, peptide nucleic acid, or other'
+        """
+        hasAA = hasDNA = hasRNA = False
+        pType = fpType = None
+        for monomer in monomerL:
+            if monomer in DictMethodCommonUtils.aaDict3:
+                hasAA = True
+            elif monomer in DictMethodCommonUtils.dnaDict3:
+                hasDNA = True
+            elif monomer in DictMethodCommonUtils.rnaDict3:
+                hasRNA = True
+        #
+        if hasAA and not hasDNA and not hasRNA:
+            pType = "polypeptide(d)"
+        elif hasDNA and not hasAA and not hasRNA:
+            pType = "polydeoxyribonucleotide"
+        elif hasRNA and not hasAA and not hasDNA:
+            pType = "polyribonucleotide"
+        elif not hasAA and hasDNA and hasRNA:
+            pType = "polydeoxyribonucleotide/polyribonucleotide hybrid"
+
+        if pType:
+            fpType = self.filterEntityPolyType(pType)
+        else:
+            pType = None
+            fpType = "Other"
+        #
+        return pType, fpType
+
+    def getPolymerComposition(self, polymerTypeList):
+        """Map in list of dictionary entity polymer/branched types to a composition string.
+            Input polymerTypeList contains entity_poly.type and pdbx_entity_branch.type values.
+
+        Args:
+            polymerTypeList (list): List of PDBx/mmCIF dictionary polymer/branched types
+
+        Returns:
+            tuple: compClass, ptClass, naClass, cD
+
+                   compClass - simplified composition string
+                   ptClass - subset class
+                   naClass - nucleic acid subset class
+                   cD (dict) - composition type counts
+
+        Current polymer type list:
+             'polypeptide(D)'
+             'polypeptide(L)'
+             'polydeoxyribonucleotide'
+             'polyribonucleotide'
+             'polysaccharide(D)'
+             'polysaccharide(L)'
+             'polydeoxyribonucleotide/polyribonucleotide hybrid'
+             'cyclic-pseudo-peptide'
+             'peptide nucleic acid'
+             'other'
+             "other type pair (polymer type count = 2)"
+             "other composition (polymer type count >= 3)"
+
+        Current branch type list:
+             'oligosaccharide'
+
+        Output composition classes:
+
+            'homomeric protein' 'single protein entity'
+            'heteromeric protein' 'multiple protein entities'
+            'DNA' 'DNA entity/entities only'
+            'RNA' 'RNA entity/entities only'
+            'NA-hybrid' 'DNA/RNA hybrid entity/entities only'
+            'protein/NA' 'Both protein and nucleic acid polymer entities'
+            'DNA/RNA' 'Both DNA and RNA polymer entities'
+            'oligosaccharide' 'One of more oligosaccharide entities'
+            'protein/oligosaccharide' 'Both protein and oligosaccharide entities'
+            'NA/oligosaccharide' 'Both NA and oligosaccharide entities'
+            'other' 'Neither an individual protein, nucleic acid polymer nor oligosaccharide entity'
+            'other type pair' 'Other combinations of 2 polymer types'
+            'other type composition' 'Other combinations of 3 or more polymer types'
+
+        And selected types (ptClass)-
+            'Protein (only)' 'protein entity/entities only'
+            'Nucleic acid (only)' 'DNA, RNA or NA-hybrid entity/entities only'
+            'Protein/NA' 'Both protein and nucleic acid (DNA, RNA, or NA-hybrid) polymer entities'
+            'Other' 'Another polymer type composition'
+
+        And selected NA types (naClass) -
+            'DNA (only)' 'DNA entity/entities only'
+            'RNA (only)' 'RNA entity/entities only'
+            'NA-hybrid (only)' 'NA-hybrid entity/entities only'
+            'DNA/RNA (only)' 'Both DNA and RNA polymer entities only'
+            'Other' 'Another polymer type composition'
+        """
+
+        compClass = "other"
+        # get type counts
+        cD = {}
+        for polymerType in polymerTypeList:
+            if polymerType in ["polypeptide(D)", "polypeptide(L)"]:
+                cD["protein"] = cD["protein"] + 1 if "protein" in cD else 1
+            elif polymerType in ["polydeoxyribonucleotide"]:
+                cD["DNA"] = cD["DNA"] + 1 if "DNA" in cD else 1
+            elif polymerType in ["polyribonucleotide"]:
+                cD["RNA"] = cD["RNA"] + 1 if "RNA" in cD else 1
+            elif polymerType in ["polydeoxyribonucleotide/polyribonucleotide hybrid"]:
+                cD["NA-hybrid"] = cD["NA-hybrid"] + 1 if "NA-hybrid" in cD else 1
+            elif polymerType in ["oligosaccharide"]:
+                cD["oligosaccharide"] = cD["oligosaccharide"] + 1 if "oligosaccharide" in cD else 1
+            else:
+                cD["other"] = cD["other"] + 1 if "other" in cD else 1
+        #
+        if len(cD) == 1:
+            ky = list(cD.keys())[0]
+            if "protein" in cD:
+                if cD["protein"] == 1:
+                    compClass = "homomeric protein"
+                else:
+                    compClass = "heteromeric protein"
+            elif ky in ["DNA", "RNA", "NA-hybrid", "oligosaccharide", "other"]:
+                compClass = ky
+        elif len(cD) == 2:
+            if "protein" in cD:
+                if ("DNA" in cD) or ("RNA" in cD) or ("NA-hybrid" in cD):
+                    compClass = "protein/NA"
+                elif "oligosaccharide" in cD:
+                    compClass = "protein/oligosaccharide"
+            elif "DNA" in cD and "RNA" in cD:
+                compClass = "DNA/RNA"
+            elif "oligosaccharide" in cD and ("RNA" in cD or "DNA" in cD):
+                compClass = "NA/oligosaccharide"
+            else:
+                compClass = "other type pair"
+        elif len(cD) == 3:
+            if "DNA" in cD and "RNA" in cD and "NA-hybrid" in cD:
+                compClass = "DNA/RNA"
+            elif "oligosaccharide" in cD and all([j in ["oligosaccharide", "DNA", "RNA", "NA-hybrid"] for j in cD]):
+                compClass = "NA/oligosaccharide"
+            elif "protein" in cD and all([j in ["protein", "DNA", "RNA", "NA-hybrid"] for j in cD]):
+                compClass = "protein/NA"
+            elif "oligosaccharide" in cD and "protein" in cD and all([j in ["protein", "oligosaccharide", "DNA", "RNA", "NA-hybrid"] for j in cD]):
+                compClass = "protein/NA/oligosaccharide"
+            else:
+                compClass = "other type composition"
+        elif len(cD) >= 4:
+            if "oligosaccharide" in cD and all([j in ["oligosaccharide", "DNA", "RNA", "NA-hybrid"] for j in cD]):
+                compClass = "NA/oligosaccharide"
+            elif "protein" in cD and all([j in ["protein", "DNA", "RNA", "NA-hybrid"] for j in cD]):
+                compClass = "protein/NA"
+            elif "oligosaccharide" in cD and "protein" in cD and all([j in ["protein", "oligosaccharide", "DNA", "RNA", "NA-hybrid"] for j in cD]):
+                compClass = "protein/NA/oligosaccharide"
+            else:
+                compClass = "other type composition"
+        else:
+            compClass = "other type composition"
+
+        # Subset type class --
+        #
+        if compClass in ["homomeric protein", "heteromeric protein"]:
+            ptClass = "Protein (only)"
+        elif compClass in ["DNA", "RNA", "NA-hybrid", "DNA/RNA"]:
+            ptClass = "Nucleic acid (only)"
+        elif compClass in ["protein/NA"]:
+            ptClass = "Protein/NA"
+        # JDW
+        elif compClass in ["protein/oligosaccharide"]:
+            ptClass = "Protein/Oligosaccharide"
+        elif compClass in ["oligosaccharide"]:
+            ptClass = "Oligosaccharide (only)"
+        # elif compClass in ["protein/NA/oligosaccharide"]:
+        #    ptClass = "Protein/NA/Oligosaccharide"
+        # JDW
+        else:
+            ptClass = "Other"
+        #
+        # NA subtype class ---
+        #
+        if compClass in ["DNA"]:
+            naClass = "DNA (only)"
+        elif compClass in ["RNA"]:
+            naClass = "RNA (only)"
+        elif compClass in ["NA-hybrid"]:
+            naClass = "NA-hybrid (only)"
+        elif compClass in ["DNA/RNA"]:
+            naClass = "DNA/RNA (only)"
+        else:
+            naClass = "Other"
+        #
+        return compClass, ptClass, naClass, cD
+
+    def filterExperimentalMethod(self, methodL):
+        """Apply a standard filter to the input experimental method list returning a method count and
+            a simplified method name.
+
+        Args:
+            methodL (list): List of dictionary compliant experimental method names
+
+        Returns:
+            tuple(int,str): methodCount, simpleMethodName
+
+        For example:
+        'X-ray'            'X-RAY DIFFRACTION, FIBER DIFFRACTION, or POWDER DIFFRACTION'
+        'NMR'              'SOLUTION NMR or SOLID-STATE NMR'
+        'EM'               'ELECTRON MICROSCOPY or ELECTRON CRYSTALLOGRAPHY or ELECTRON TOMOGRAPHY'
+        'Neutron'          'NEUTRON DIFFRACTION'
+        'Multiple methods' 'Multiple experimental methods'
+        'Other'            'SOLUTION SCATTERING, EPR, THEORETICAL MODEL, INFRARED SPECTROSCOPY or FLUORESCENCE TRANSFER'
+        """
+        methodCount = len(methodL)
+        if methodCount > 1:
+            expMethod = "Multiple methods"
+        else:
+            #
+            mS = methodL[0].upper()
+            expMethod = "Other"
+            if mS in ["X-RAY DIFFRACTION", "FIBER DIFFRACTION", "POWDER DIFFRACTION"]:
+                expMethod = "X-ray"
+            elif mS in ["SOLUTION NMR", "SOLID-STATE NMR"]:
+                expMethod = "NMR"
+            elif mS in ["ELECTRON MICROSCOPY", "ELECTRON CRYSTALLOGRAPHY", "ELECTRON DIFFRACTION", "CRYO-ELECTRON MICROSCOPY", "ELECTRON TOMOGRAPHY"]:
+                expMethod = "EM"
+            elif mS in ["NEUTRON DIFFRACTION"]:
+                expMethod = "Neutron"
+            elif mS in ["SOLUTION SCATTERING", "EPR", "THEORETICAL MODEL", "INFRARED SPECTROSCOPY", "FLUORESCENCE TRANSFER"]:
+                expMethod = "Other"
+            else:
+                logger.error("Unexpected method ")
+
+        return methodCount, expMethod
+
+    def hasMethodNMR(self, methodL):
+        """Return if the input dictionary experimental method list contains an NMR experimental method.
+
+        Args:
+            methodL (list): List of dictionary experimental method names
+
+        Returns:
+            bool: True if the input contains NMR or False otherwise
+        """
+        ok = False
+        for method in methodL:
+            if method in ["SOLUTION NMR", "SOLID-STATE NMR"]:
+                return True
+        return ok
+
+    def __getTimeStamp(self):
+        utcnow = datetime.datetime.utcnow()
+        ts = utcnow.strftime("%Y-%m-%d:%H:%M:%S")
+        return ts
+
+    def __stripWhiteSpace(self, val):
+        """Remove all white space from the input value."""
+        if val is None:
+            return val
+        return self.__wsPattern.sub("", val)
+
+    def __toRangeList(self, iterable):
+        iterable = sorted(set(iterable))
+        for _, group in itertools.groupby(enumerate(iterable), lambda t: t[1] - t[0]):
+            group = list(group)
+            yield group[0][1], group[-1][1]
+
+    #
+    def getTargetSiteInfo(self, dataContainer):
+        """Return a dictionary of target site binding interactions using standard nomenclature.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {site_id: [{'asymId': , 'compId': , 'seqId': }, ...],  ... }
+
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchInstanceSiteInfo(dataContainer)
+        return wD["targetSiteD"] if "targetSiteD" in wD else {}
+
+    def getLigandSiteInfo(self, dataContainer):
+        """Return a dictionary of ligand site binding interactions.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {site_id: {"evCode": software|author,
+                            "fromDetails": True|False,
+                            "isRaw": True|False,
+                            "entityType": polymer|non-polymer,
+                            "polymerLigand": {"asymId": ., "entityId": ., "begSeqId": ., "endSeqId":. },
+                            "nonPolymerLigands": [{"asymId": ., "entityId": ., "compId": .}, ...],
+                            "description": raw or generated text,
+                            }
+                            }
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchInstanceSiteInfo(dataContainer)
+        return wD["ligandSiteD"] if "ligandSiteD" in wD else {}
+
+    def __fetchInstanceSiteInfo(self, dataContainer):
+        wD = self.__instanceSiteInfoCache.get(dataContainer.getName())
+        if not wD:
+            wD = self.__getInstanceSiteInfo(dataContainer)
+            self.__instanceSiteInfoCache.set(dataContainer.getName(), wD)
+        return wD
+
+    def __getInstanceSiteInfo(self, dataContainer):
+        """[summary]
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+
+        Returns:
+            dict : {"targetSiteD" = {<site_id>: {}}
+                    "ligandSiteD": {<site_id>: {}}
+                    }
+
+        For example:
+
+                loop_
+                _struct_site.id
+                _struct_site.pdbx_evidence_code
+                _struct_site.pdbx_auth_asym_id
+                _struct_site.pdbx_auth_comp_id
+                _struct_site.pdbx_auth_seq_id
+                _struct_site.pdbx_auth_ins_code # never used
+                _struct_site.pdbx_num_residues
+                _struct_site.details
+                AC1 Software ? ? ? ? 7  'BINDING SITE FOR RESIDUE ADP A 105'
+                AC2 Software ? ? ? ? 16 'BINDING SITE FOR RESIDUE ADP B 101'
+                AC3 Software ? ? ? ? 6  'BINDING SITE FOR RESIDUE MG B 66'
+                AC4 Software ? ? ? ? 13 'BINDING SITE FOR RESIDUE ADP C 102'
+                AC5 Software ? ? ? ? 16 'BINDING SITE FOR RESIDUE ADP E 103'
+                AC6 Software ? ? ? ? 10 'BINDING SITE FOR RESIDUE ADP F 104'
+                AC7 Software ? ? ? ? 6  'BINDING SITE FOR RESIDUE MG K 9'
+                #
+                loop_
+                _struct_site_gen.id
+                _struct_site_gen.site_id
+                _struct_site_gen.pdbx_num_res
+                _struct_site_gen.label_comp_id
+                _struct_site_gen.label_asym_id
+                _struct_site_gen.label_seq_id
+                _struct_site_gen.pdbx_auth_ins_code
+                _struct_site_gen.auth_comp_id
+                _struct_site_gen.auth_asym_id
+                _struct_site_gen.auth_seq_id
+                _struct_site_gen.label_atom_id
+                _struct_site_gen.label_alt_id
+                _struct_site_gen.symmetry
+                _struct_site_gen.details
+                1  AC1 7  TYR A 25 ? TYR A 25  . ? 1_555 ?
+                2  AC1 7  GLY A 29 ? GLY A 29  . ? 1_555 ?
+                3  AC1 7  THR A 61 ? THR A 61  . ? 1_555 ?
+                4  AC1 7  VAL A 63 ? VAL A 63  . ? 1_555 ?
+                5  AC1 7  ILE B 30 ? ILE B 30  . ? 1_555 ?
+                6  AC1 7  LEU B 32 ? LEU B 32  . ? 1_555 ?
+                7  AC1 7  GLN B 52 ? GLN B 52  . ? 1_555 ?
+                8  AC2 16 TYR B 25 ? TYR B 25  . ? 1_555 ?
+                9  AC2 16 LEU B 26 ? LEU B 26  . ? 1_555 ?
+                10 AC2 16 GLY B 29 ? GLY B 29  . ? 1_555 ?
+                11 AC2 16 LYS B 31 ? LYS B 31  . ? 1_555 ?
+                12 AC2 16 SER B 60 ? SER B 60  . ? 1_555 ?
+                13 AC2 16 THR B 61 ? THR B 61  . ? 1_555 ?
+                14 AC2 16 HOH P .  ? HOH B 113 . ? 1_555 ?
+                15 AC2 16 HOH P .  ? HOH B 116 . ? 1_555 ?
+                16 AC2 16 HOH P .  ? HOH B 201 . ? 1_555 ?
+                17 AC2 16 HOH P .  ? HOH B 241 . ? 1_555 ?
+                18 AC2 16 LEU C 26 ? LEU C 26  . ? 1_555 ?
+                19 AC2 16 ASN C 28 ? ASN C 28  . ? 1_555 ?
+                20 AC2 16 ILE C 30 ? ILE C 30  . ? 1_555 ?
+                21 AC2 16 LEU C 32 ? LEU C 32  . ? 1_555 ?
+                22 AC2 16 ARG F 16 ? ARG F 16  . ? 1_565 ?
+                23 AC2 16 ARG F 17 ? ARG F 17  . ? 1_565 ?
+        """
+        logger.debug("Starting with %r", dataContainer.getName())
+        #
+        rD = {"targetSiteD": {}, "ligandSiteD": {}}
+        try:
+            # Exit if source categories are missing
+            if not (dataContainer.exists("struct_site") and dataContainer.exists("struct_site_gen")):
+                return rD
+            # ------- --------- ------- --------- ------- --------- ------- --------- ------- ---------
+            ssObj = None
+            if dataContainer.exists("struct_site"):
+                ssObj = dataContainer.getObj("struct_site")
+            #
+            ssgObj = None
+            if dataContainer.exists("struct_site_gen"):
+                ssgObj = dataContainer.getObj("struct_site_gen")
+
+            #
+            ligandSiteD = {}
+            for ii in range(ssObj.getRowCount()):
+                ligL = []
+                evCode = str(ssObj.getValue("pdbx_evidence_code", ii)).lower()
+                if evCode not in ["software", "author"]:
+                    continue
+                sId = ssObj.getValue("id", ii)
+                authAsymId = ssObj.getValueOrDefault("pdbx_auth_asym_id", ii, defaultValue=None)
+                compId = ssObj.getValueOrDefault("pdbx_auth_comp_id", ii, defaultValue=None)
+                authSeqId = ssObj.getValueOrDefault("pdbx_auth_seq_id", ii, defaultValue=None)
+                ssDetails = ssObj.getValueOrDefault("details", ii, defaultValue=None)
+                fromDetails = False
+                if authAsymId:
+                    ligL.append((authAsymId, compId, authSeqId, ssDetails))
+                else:
+                    fromDetails = True
+                    if evCode == "software":
+                        ligL = self.__parseStructSiteLigandDetails(ssDetails)
+                    elif evCode == "author":
+                        ligL.append((None, None, None, ssDetails))
+                #
+                ligandSiteD[sId] = self.__transStructSiteLigandDetails(dataContainer, ligL, evCode=evCode, fromDetails=fromDetails)
+            #
+
+            targetSiteD = {}
+            instTypeD = self.getInstanceTypes(dataContainer)
+            for ii in range(ssgObj.getRowCount()):
+                sId = ssgObj.getValue("site_id", ii)
+                asymId = ssgObj.getValueOrDefault("label_asym_id", ii, defaultValue=None)
+                compId = ssgObj.getValueOrDefault("label_comp_id", ii, defaultValue=None)
+                seqId = ssgObj.getValueOrDefault("label_seq_id", ii, defaultValue=None)
+                #
+                if asymId and compId and seqId and asymId in instTypeD and instTypeD[asymId] == "polymer":
+                    targetSiteD.setdefault(sId, []).append({"asymId": asymId, "compId": compId, "seqId": seqId})
+            #
+            return {"targetSiteD": targetSiteD, "ligandSiteD": ligandSiteD}
+        except Exception as e:
+            logger.exception("%s failing with %s", dataContainer.getName(), str(e))
+        return rD
+
+    def __transStructSiteLigandDetails(self, dataContainer, ligL, evCode="software", fromDetails=True):
+        """Convert struct_site ligand details to standard nomenclature.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            ligL (list): list of raw ligand details in author nomenclature
+            evCode (str):  string  (software|author)
+            fromDetails (bool, optional): details parsed from descriptive text. Defaults to True.
+
+        Returns:
+            dict: {"evCode": software|author,
+                   "fromDetails": True|False,
+                   "isRaw": True|False,
+                   "entityType": polymer|non-polymer,
+                   "polymerLigand": {"asymId": ., "entityId": ., "begSeqId": ., "endSeqId":. },
+                   "nonPolymerLigands": [{"asymId": ., "entityId": ., "compId": .}, ...],
+                   "description": raw or generated text,
+                   "siteLabel": replacement for data site id,
+                   }
+
+        """
+        rD = {
+            "evCode": evCode,
+            "fromDetails": fromDetails,
+            "isRaw": True,
+            "entityType": None,
+            "polymerLigand": None,
+            "nonPolymerLigands": None,
+            "description": None,
+            "siteLabel": None,
+        }
+        npAuthAsymD = self.getNonPolymerIdMap(dataContainer)
+        pAuthAsymD = self.getPolymerIdMap(dataContainer)
+        asymAuthIdD = self.getAsymAuthIdMap(dataContainer)
+        asymIdPolymerRangesD = self.getInstancePolymerRanges(dataContainer)
+        iTypeD = self.getInstanceTypes(dataContainer)
+        asymAuthIdD = self.getAsymAuthIdMap(dataContainer)
+        # Note that this is a non-unique index inversion
+        authAsymD = {v: k for k, v in asymAuthIdD.items()}
+        instEntityD = self.getInstanceEntityMap(dataContainer)
+        evS = "Software generated" if evCode == "software" else "Author provided"
+        #
+        if len(ligL) == 1:
+            authAsymId, compId, authSeqId, ssDetails = ligL[0]
+            #
+            if not authAsymId:
+                rD["description"] = ssDetails
+                rD["isRaw"] = True
+            elif not authSeqId:
+                # An unqualified authAsymId -
+                asymId = authAsymD[authAsymId] if authAsymId in authAsymD else None
+                entityId = instEntityD[asymId] if asymId in instEntityD else None
+                if entityId and asymId and asymId in iTypeD and iTypeD[asymId] == "polymer" and asymId in asymIdPolymerRangesD:
+                    # insert the full residue range -
+                    rD["entityType"] = iTypeD[asymId]
+                    begSeqId = asymIdPolymerRangesD[asymId]["begSeqId"]
+                    endSeqId = asymIdPolymerRangesD[asymId]["endSeqId"]
+                    tD = {"asymId": asymId, "entityId": instEntityD[asymId], "begSeqId": begSeqId, "endSeqId": endSeqId}
+                    rD["description"] = "%s binding site for entity %s (%s-%s) instance %s chain %s" % (evS, entityId, begSeqId, endSeqId, asymId, authAsymId)
+                    rD["polymerLigand"] = tD
+                    rD["siteLabel"] = "chain %s" % authAsymId
+            elif (authAsymId, authSeqId) in npAuthAsymD:
+                # single non-polymer-ligand -
+                asymId = npAuthAsymD[(authAsymId, authSeqId)]["asym_id"]
+                rD["entityType"] = iTypeD[asymId]
+                entityId = instEntityD[asymId]
+                tD = {"asymId": asymId, "entityId": instEntityD[asymId], "compId": compId}
+                rD["nonPolymerLigands"] = [tD]
+                rD["description"] = "%s binding site for ligand entity %s component %s instance %s chain %s" % (evS, entityId, compId, asymId, authAsymId)
+                rD["siteLabel"] = "ligand %s" % compId
+            elif (authAsymId, authSeqId, None) in pAuthAsymD:
+                # single monomer ligand - an odd case
+                asymId = pAuthAsymD[(authAsymId, authSeqId, None)]["asym_id"]
+                entityId = pAuthAsymD[(authAsymId, authSeqId, None)]["entity_id"]
+                seqId = pAuthAsymD[(authAsymId, authSeqId, None)]["seq_id"]
+                rD["entityType"] = iTypeD[asymId]
+                tD = {"asymId": asymId, "entityId": entityId, "begSeqId": seqId, "endSeqId": seqId}
+                rD["description"] = "%s binding site for entity %s instance %s chainId %s (%s)" % (evS, entityId, asymId, authAsymId, authSeqId)
+                rD["polymerLigand"] = tD
+                rD["siteLabel"] = "chain %s" % authAsymId
+            else:
+                logger.debug("%s untranslated single ligand details %r", dataContainer.getName(), ligL)
+                logger.debug("npAuthAsymD %r", npAuthAsymD)
+                rD["description"] = ssDetails
+                rD["isRaw"] = True
+            #
+        elif len(ligL) == 2:
+            authAsymIdA, compIdA, authSeqIdA, ssDetailsA = ligL[0]
+            authAsymIdB, compIdB, authSeqIdB, _ = ligL[1]
+            #
+            # is np
+            if (authAsymIdA, authSeqIdA) in npAuthAsymD and (authAsymIdB, authSeqIdB) in npAuthAsymD:
+                asymIdA = npAuthAsymD[(authAsymIdA, authSeqIdA)]["asym_id"]
+                entityIdA = npAuthAsymD[(authAsymIdA, authSeqIdA)]["entity_id"]
+                asymIdB = npAuthAsymD[(authAsymIdB, authSeqIdB)]["asym_id"]
+                entityIdB = npAuthAsymD[(authAsymIdB, authSeqIdB)]["entity_id"]
+                tDA = {"asymId": asymIdA, "entityId": entityIdA, "compId": compIdA}
+                tDB = {"asymId": asymIdB, "entityId": entityIdB, "compId": compIdB}
+                rD["nonPolymerLigands"] = [tDA, tDB]
+                rD["entityType"] = iTypeD[asymIdA]
+                rD["description"] = "%s binding site for ligands: entity %s component %s instance %s chain %s and entity %s component %s instance %s chain %s" % (
+                    evS,
+                    entityIdA,
+                    compIdA,
+                    asymIdA,
+                    authAsymIdA,
+                    entityIdB,
+                    compIdB,
+                    asymIdB,
+                    authAsymIdB,
+                )
+                rD["siteLabel"] = "ligands %s/%s" % (compIdA, compIdB)
+            elif (authAsymIdA, authSeqIdA, None) in pAuthAsymD and (authAsymIdB, authSeqIdB, None) in pAuthAsymD and authAsymIdA == authAsymIdB:
+                asymIdA = pAuthAsymD[(authAsymIdA, authSeqIdA, None)]["asym_id"]
+                entityIdA = pAuthAsymD[(authAsymIdA, authSeqIdA, None)]["entity_id"]
+                asymIdB = pAuthAsymD[(authAsymIdB, authSeqIdB, None)]["asym_id"]
+                entityIdB = pAuthAsymD[(authAsymIdB, authSeqIdB, None)]["entity_id"]
+                begSeqId = pAuthAsymD[(authAsymIdA, authSeqIdA, None)]["seq_id"]
+                endSeqId = pAuthAsymD[(authAsymIdB, authSeqIdB, None)]["seq_id"]
+                tD = {"asymId": asymIdA, "entityId": instEntityD[asymIdA], "begSeqId": begSeqId, "endSeqId": endSeqId}
+                rD["entityType"] = iTypeD[asymIdA]
+                rD["description"] = "%s binding site for entity %s instance %s chain %s and entity %s instance %s chain %s" % (
+                    evS,
+                    entityIdA,
+                    asymIdA,
+                    authAsymIdA,
+                    entityIdB,
+                    asymIdB,
+                    authAsymIdB,
+                )
+                rD["polymerLigand"] = tD
+                rD["siteLabel"] = "chains %s/%s" % (authAsymIdA, authAsymIdB)
+            else:
+                logger.debug("%s untranslated ligand details %r", dataContainer.getName(), ligL)
+                rD["description"] = ssDetailsA
+                rD["isRaw"] = True
+        else:
+            logger.error("%s unexpected ligand expression %r", dataContainer.getName(), ligL)
+        return rD
+
+    def __parseStructSiteLigandDetails(self, ssDetails):
+        """Parse the input site description text and returning structured details
+        where possible.
+
+        Args:
+            ssDetails (str): struct_site.details text
+
+        Returns:
+            list: [(authAsymId, compId, authSeqId, ssDetails), ... ]
+
+        """
+        retL = []
+        #
+        try:
+            if not ssDetails:
+                retL.append((None, None, None, None))
+                return retL
+            prefixL = [
+                "BINDING SITE FOR RESIDUE ",
+                "binding site for residue ",
+                "Binding site for Ligand ",
+                "binding site for Ligand ",
+                "Binding site for Mono-Saccharide ",
+                "BINDING SITE FOR MONO-SACCHARIDE ",
+                "binding site for Mono-Saccharide ",
+                "binding site for Poly-Saccharide ",
+                "binding site for nucleotide ",
+            ]
+            for prefix in prefixL:
+                tup = ssDetails.partition(prefix)
+                if tup[1] == prefix:
+                    ff = tup[2].split(" ")
+                    # binding site for Ligand residues POL d 4 through N7P d 1 bound to THR b 1
+                    if ff[0] == "residues" and len(ff) > 8 and ff[4].lower() == "through":
+                        compIdA = ff[1]
+                        authAsymIdA = ff[2]
+                        authSeqIdA = ff[3]
+                        retL.append((authAsymIdA, compIdA, authSeqIdA, ssDetails))
+                        #
+                        compIdB = ff[5]
+                        authAsymIdB = ff[6]
+                        authSeqIdB = ff[7]
+                        retL.append((authAsymIdB, compIdB, authSeqIdB, ssDetails))
+                        return retL
+                    elif len(ff) == 2:
+                        compId = ff[0]
+                        authAsymId = ff[1][0]
+                        authSeqId = ff[1][1:]
+                        retL.append((authAsymId, compId, authSeqId, ssDetails))
+                        return retL
+                    elif len(ff) == 3:
+                        compId = ff[0]
+                        authAsymId = ff[1]
+                        authSeqId = ff[2]
+                        retL.append((authAsymId, compId, authSeqId, ssDetails))
+                        return retL
+
+            #
+            # Binding site for residues GCD A 900 and NGA A 901
+            # Binding site for residues FUC A1118 and BGC A1119'
+            prefixL = [
+                "Binding site for residues ",
+                "binding site for residues ",
+                "BINDING SITE FOR DI-SACCHARIDE ",
+                "Binding site for Di-Saccharide ",
+                "binding site for Di-Saccharide ",
+                "binding site for Di-peptide ",
+                "Binding site for Di-peptide ",
+                "binding site for Di-nucleotide ",
+            ]
+            for prefix in prefixL:
+                tup = ssDetails.partition(prefix)
+                if tup[1] == prefix:
+                    ff = tup[2].split(" ")
+                    if len(ff) == 5:
+                        compIdA = ff[0]
+                        authAsymIdA = ff[1][0]
+                        authSeqIdA = ff[1][1:]
+                        compIdB = ff[3]
+                        authAsymIdB = ff[4][0]
+                        authSeqIdB = ff[4][1:]
+                    elif len(ff) == 7:
+                        compIdA = ff[0]
+                        authAsymIdA = ff[1]
+                        authSeqIdA = ff[2]
+                        compIdB = ff[4]
+                        authAsymIdB = ff[5]
+                        authSeqIdB = ff[6]
+                    else:
+                        compIdA = authAsymIdA = authSeqIdA = compIdB = authAsymIdB = authSeqIdB = None
+
+                    retL.append((authAsymIdA, compIdA, authSeqIdA, ssDetails))
+                    retL.append((authAsymIdB, compIdB, authSeqIdB, ssDetails))
+                    return retL
+            #
+            # BINDING SITE FOR LINKED RESIDUES A 1519 A 1520 A 1521 A 1522 A 1523 A 1524 A 1525
+            # BINDING SITE FOR LINKED RESIDUES A 801 to 802
+            prefixL = ["BINDING SITE FOR LINKED RESIDUES "]
+            for prefix in prefixL:
+                tup = ssDetails.partition(prefix)
+                if tup[1] == prefix:
+                    ff = tup[2].split(" ")
+                    if len(ff) == 2:
+                        # BINDING SITE FOR LINKED RESIDUES A 502-507
+                        try:
+                            tff = ff[1].split("-")
+                            authAsymIdA = ff[0]
+                            authSeqIdA = tff[0]
+                            authSeqIdB = tff[1]
+                        except Exception:
+                            continue
+                    if len(ff) == 4 and ff[2].lower() == "to":
+                        authAsymIdA = ff[0]
+                        authSeqIdA = ff[1]
+                        authSeqIdB = ff[3]
+                    elif len(ff) == 4 and ff[2].lower() != "to":
+                        authAsymIdA = ff[0]
+                        authSeqIdA = ff[1]
+                        authSeqIdB = ff[3]
+                    elif len(ff) > 4:
+                        authAsymIdA = ff[0]
+                        authSeqIdA = ff[1]
+                        authSeqIdB = ff[-1]
+                    else:
+                        continue
+                    retL.append((authAsymIdA, None, authSeqIdA, ssDetails))
+                    retL.append((authAsymIdA, None, authSeqIdB, ssDetails))
+                    return retL
+
+            #
+            #
+            prefixL = ["BINDING SITE FOR CHAIN ", "binding site for chain "]
+            for prefix in prefixL:
+                tup = ssDetails.partition(prefix)
+                if tup[1] == prefix:
+                    ff = tup[2].split(" ")
+                    authAsymId = ff[0]
+                    retL.append((authAsymId, None, None, ssDetails))
+                    return retL
+            # punt -
+            retL.append((None, None, None, ssDetails))
+            return retL
+        except Exception as e:
+            logger.exception("Failing with %s for %r", str(e), ssDetails)
+        return [(None, None, None, ssDetails)]
+
+    def getUnobservedPolymerResidueInfo(self, dataContainer):
+        """Return a dictionary of unobserved regions of polymer instances.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {(modelId, asymId, occFlag): [seqId range list], ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchUnobservedInfo(dataContainer)
+        return wD["polyResRng"] if "polyResRng" in wD else {}
+
+    def getUnobservedPolymerAtomInfo(self, dataContainer):
+        """Return a dictionary of polymer regions containing unobserved atoms.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {(modelId, asymId, occFlag): [seqId range list], ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchUnobservedInfo(dataContainer)
+        return wD["polyAtomRng"] if "polyAtomRng" in wD else {}
+
+    def getUnobservedNonPolymerAtomInfo(self, dataContainer):
+        """Return a dictionary of nonpolymer instances containing unobserved atoms (std nomenclature).
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {(modelId, compId, asymId, occFlag): [atomId, .. ], ...}
+
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchUnobservedInfo(dataContainer)
+        return wD["nonPolyMissingAtomD"] if "nonPolyMissingAtomD" in wD else {}
+
+    def getUnobservedNonPolymerAtomInfoAuth(self, dataContainer):
+        """Return a dictionary of nonpolymer instances containing unobserved atoms (auth nomenclature)
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {(modelId, compId, authtAsymId, authSeqIdm, occFlag): [atomId, .. ], ...}
+
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchUnobservedInfo(dataContainer)
+        return wD["nonPolyMissingAtomAuthD"] if "nonPolyMissingAtomAuthD" in wD else {}
+
+    def __fetchUnobservedInfo(self, dataContainer):
+        wD = self.__instanceUnobservedCache.get(dataContainer.getName())
+        if not wD:
+            wD = self.__getUnobserved(dataContainer)
+            self.__instanceUnobservedCache.set(dataContainer.getName(), wD)
+        return wD
+
+    def __getUnobserved(self, dataContainer):
+        """Internal method to extract unobserved and zero occupancy features.
+
+        Args:
+            dataContainer ([type]): [description]
+
+        Returns:
+            {"polyResRng":  {(modelId, asymId, occFlag): [seqId range list], ...},
+             "polyAtomRng": {(modelId, asymId, occFlag): [seqId range list], ...},
+             "nonPolyMissingAtomD": {(modelId, compId, asymId, zeroOccFlag): [atomId,...], },
+             "nonPolyMissingAtomAuthD": {(modelId, compId, authAsymId, authSeqId, zeroOccFlag): [atomId,...], },
+             }
+
+            occFlag = 0 -> zero occupancy
+        Example:
+
+                loop_
+                _pdbx_unobs_or_zero_occ_atoms.id
+                _pdbx_unobs_or_zero_occ_atoms.PDB_model_num
+                _pdbx_unobs_or_zero_occ_atoms.polymer_flag
+                _pdbx_unobs_or_zero_occ_atoms.occupancy_flag
+                _pdbx_unobs_or_zero_occ_atoms.auth_asym_id
+                _pdbx_unobs_or_zero_occ_atoms.auth_comp_id
+                _pdbx_unobs_or_zero_occ_atoms.auth_seq_id
+                _pdbx_unobs_or_zero_occ_atoms.PDB_ins_code
+                _pdbx_unobs_or_zero_occ_atoms.auth_atom_id
+                _pdbx_unobs_or_zero_occ_atoms.label_alt_id
+                _pdbx_unobs_or_zero_occ_atoms.label_asym_id
+                _pdbx_unobs_or_zero_occ_atoms.label_comp_id
+                _pdbx_unobs_or_zero_occ_atoms.label_seq_id
+                _pdbx_unobs_or_zero_occ_atoms.label_atom_id
+                1  1 Y 1 B ARG 17  ? NE    ? B ARG 17 NE
+                2  1 Y 1 B ARG 17  ? CZ    ? B ARG 17 CZ
+                3  1 Y 1 B ARG 17  ? NH1   ? B ARG 17 NH1
+
+                #
+                loop_
+                _pdbx_unobs_or_zero_occ_residues.id
+                _pdbx_unobs_or_zero_occ_residues.PDB_model_num
+                _pdbx_unobs_or_zero_occ_residues.polymer_flag
+                _pdbx_unobs_or_zero_occ_residues.occupancy_flag
+                _pdbx_unobs_or_zero_occ_residues.auth_asym_id
+                _pdbx_unobs_or_zero_occ_residues.auth_comp_id
+                _pdbx_unobs_or_zero_occ_residues.auth_seq_id
+                _pdbx_unobs_or_zero_occ_residues.PDB_ins_code
+                _pdbx_unobs_or_zero_occ_residues.label_asym_id
+                _pdbx_unobs_or_zero_occ_residues.label_comp_id
+                _pdbx_unobs_or_zero_occ_residues.label_seq_id
+                1  1 Y 1 A MET 1 ? A MET 1
+                2  1 Y 1 A ALA 2 ? A ALA 2
+                3  1 Y 1 A LYS 3 ? A LYS 3
+        """
+        logger.debug("Starting with %r", dataContainer.getName())
+        #
+        rD = {}
+        try:
+            # Exit if source categories are missing
+            if not (dataContainer.exists("pdbx_unobs_or_zero_occ_residues") or dataContainer.exists("pdbx_unobs_or_zero_occ_atoms")):
+                return rD
+            # ------- --------- ------- --------- ------- --------- ------- --------- ------- ---------
+            resObj = None
+            if dataContainer.exists("pdbx_unobs_or_zero_occ_residues"):
+                resObj = dataContainer.getObj("pdbx_unobs_or_zero_occ_residues")
+            #
+            atomObj = None
+            if dataContainer.exists("pdbx_unobs_or_zero_occ_atoms"):
+                atomObj = dataContainer.getObj("pdbx_unobs_or_zero_occ_atoms")
+            #
+            polyResRngD = {}
+            if resObj:
+                for ii in range(resObj.getRowCount()):
+                    modelId = resObj.getValueOrDefault("PDB_model_num", ii, defaultValue=None)
+                    pFlag = resObj.getValueOrDefault("polymer_flag", ii, defaultValue=None)
+                    if pFlag == "Y":
+                        occFlag = resObj.getValueOrDefault("occupancy_flag", ii, defaultValue=None)
+                        zeroOccFlag = int(occFlag) == 0
+                        asymId = resObj.getValueOrDefault("label_asym_id", ii, defaultValue=None)
+                        # authAsymId = resObj.getValueOrDefault("auth_asym_id", ii, defaultValue=None)
+                        seqId = resObj.getValueOrDefault("label_seq_id", ii, defaultValue=None)
+                        if seqId:
+                            polyResRngD.setdefault((modelId, asymId, zeroOccFlag), []).append(int(seqId))
+                #
+                for tup in polyResRngD:
+                    polyResRngD[tup] = list(self.__toRangeList(polyResRngD[tup]))
+                logger.debug("polyResRngD %r", polyResRngD)
+            #
+            polyAtomRngD = {}
+            nonPolyMissingAtomD = {}
+            nonPolyMissingAtomAuthD = {}
+            if atomObj:
+                for ii in range(atomObj.getRowCount()):
+                    modelId = atomObj.getValueOrDefault("PDB_model_num", ii, defaultValue=None)
+                    pFlag = atomObj.getValueOrDefault("polymer_flag", ii, defaultValue=None)
+                    occFlag = atomObj.getValueOrDefault("occupancy_flag", ii, defaultValue=None)
+                    zeroOccFlag = occFlag and int(occFlag) == 0
+                    asymId = atomObj.getValueOrDefault("label_asym_id", ii, defaultValue=None)
+                    if pFlag == "Y":
+                        # authAsymId = resObj.getValueOrDefault("auth_asym_id", ii, defaultValue=None)
+                        seqId = atomObj.getValueOrDefault("label_seq_id", ii, defaultValue=None)
+                        if seqId:
+                            polyAtomRngD.setdefault((modelId, asymId, zeroOccFlag), []).append(int(seqId))
+                    else:
+                        authAsymId = atomObj.getValueOrDefault("auth_asym_id", ii, defaultValue=None)
+                        authSeqId = atomObj.getValueOrDefault("auth_seq_id", ii, defaultValue=None)
+                        atomId = atomObj.getValueOrDefault("label_atom_id", ii, defaultValue=None)
+                        compId = atomObj.getValueOrDefault("label_comp_id", ii, defaultValue=None)
+                        nonPolyMissingAtomD.setdefault((modelId, compId, asymId, zeroOccFlag), []).append(atomId)
+                        nonPolyMissingAtomAuthD.setdefault((modelId, compId, authAsymId, authSeqId, zeroOccFlag), []).append(atomId)
+                #
+                for tup in polyAtomRngD:
+                    polyAtomRngD[tup] = list(self.__toRangeList(polyAtomRngD[tup]))
+                logger.debug("polyAtomRngD %r", polyAtomRngD)
+            #
+            rD = {"polyResRng": polyResRngD, "polyAtomRng": polyAtomRngD, "nonPolyMissingAtomD": nonPolyMissingAtomD, "nonPolyMissingAtomAuthD": nonPolyMissingAtomAuthD}
+        except Exception as e:
+            logger.exception("%s failing with %s", dataContainer.getName(), str(e))
+        return rD
+
+    def getInstanceModelOutlierInfo(self, dataContainer):
+        """Return a dictionary of polymer model outliers.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {(modelId, asymId): (seqId,compId), ...}
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchInstanceModelOutliers(dataContainer)
+        return wD["instanceModelOutlierD"] if "instanceModelOutlierD" in wD else {}
+
+    def getInstanceNonpolymerValidationInfo(self, dataContainer):
+        """Return a dictionary of nonpolymer validation details.
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+
+        Returns:
+            dict: {(modelId, asymId): NonpolymerValidationInstance(rsr, rsrCc, bondsRmsZ, anglesRmsZ, missingAtomCount)}
+
+        """
+        if not dataContainer or not dataContainer.getName():
+            return {}
+        wD = self.__fetchInstanceModelOutliers(dataContainer)
+        return wD["instanceModelValidationD"] if "instanceModelValidationD" in wD else {}
+
+    def __fetchInstanceModelOutliers(self, dataContainer):
+        wD = self.__modelOutliersCache.get(dataContainer.getName())
+        if not wD:
+            wD = self.__getInstanceModelOutliers(dataContainer)
+            self.__modelOutliersCache.set(dataContainer.getName(), wD)
+        return wD
+
+    def __getInstanceModelOutliers(self, dataContainer):
+        """Internal method to assemble model outliers details.
+
+        Args:
+            dataContainer ([type]): [description]
+
+        Returns:
+            {"instanceModelOutlierD": {(modelId, asymId): [(compId, seqId, "BOND_OUTLIER", optional_description), ...}}
+        #
+            loop_
+            _pdbx_vrpt_instance_results.ordinal
+            _pdbx_vrpt_instance_results.entity_id
+            _pdbx_vrpt_instance_results.auth_asym_id
+            _pdbx_vrpt_instance_results.label_asym_id
+            _pdbx_vrpt_instance_results.label_comp_id
+            _pdbx_vrpt_instance_results.auth_seq_id
+            _pdbx_vrpt_instance_results.label_seq_id
+            _pdbx_vrpt_instance_results.PDB_ins_code
+            _pdbx_vrpt_instance_results.label_alt_id
+            _pdbx_vrpt_instance_results.PDB_model_num
+            _pdbx_vrpt_instance_results.num_H_reduce
+            _pdbx_vrpt_instance_results.cis_peptide
+            _pdbx_vrpt_instance_results.natoms_eds
+            _pdbx_vrpt_instance_results.RSR
+            _pdbx_vrpt_instance_results.RSRCC
+            _pdbx_vrpt_instance_results.RSRZ
+            _pdbx_vrpt_instance_results.OWAB
+            _pdbx_vrpt_instance_results.average_occupancy
+            _pdbx_vrpt_instance_results.ramachandran_class
+            _pdbx_vrpt_instance_results.rotamer_class
+            _pdbx_vrpt_instance_results.phi
+            _pdbx_vrpt_instance_results.psi
+            _pdbx_vrpt_instance_results.mogul_angles_RMSZ
+            _pdbx_vrpt_instance_results.mogul_bonds_RMSZ
+            _pdbx_vrpt_instance_results.mogul_RMSZ_num_angles
+            _pdbx_vrpt_instance_results.mogul_RMSZ_num_bonds
+            # ...
+            302 1 A A TYR 340 343 ? ? 1 9  ?   12 0.108 0.943 0.117  71.350  1.000 Favored m-85    -111.8 6.4    ?    ?    ?  ?
+            303 1 A A LYS 341 344 ? ? 1 13 ?   9  0.120 0.955 -0.380 67.860  1.000 Favored mttt    -73.3  139.6  ?    ?    ?  ?
+            304 1 A A ILE 342 345 ? ? 1 11 ?   8  0.147 0.964 0.799  76.030  1.000 Favored pt      -140.0 171.7  ?    ?    ?  ?
+            305 1 A A ASN 343 346 ? ? 1 6  ?   8  0.182 0.948 1.114  82.730  1.000 Favored m-80    52.8   49.6   ?    ?    ?  ?
+            306 1 A A GLN 344 347 ? ? 1 2  ?   5  0.193 0.807 1.002  97.730  1.000 ?       ?       ?      ?      ?    ?    ?  ?
+            # ...
+            307 2 A B PEG 401 .   ? A 1 10 ?   14 0.154 0.914 ?      36.150  1.000 ?       ?       ?      ?      0.76 0.64 5  6
+            308 2 A B PEG 401 .   ? B 1 10 ?   14 0.154 0.914 ?      36.150  1.000 ?       ?       ?      ?      0.97 0.68 5  6
+            309 3 A C HYO 402 .   ? ? 1 ?  ?   21 0.108 0.947 ?      35.530  1.000 ?       ?       ?      ?      2.18 4.96 32 23
+            310 4 A D NI  403 .   ? ? 1 ?  ?   1  0.096 0.999 ?      28.080  1.000 ?       ?       ?      ?      ?    ?    ?  ?
+            311 5 A E OGA 404 .   ? ? 1 3  ?   10 0.104 0.976 ?      30.510  1.000 ?       ?       ?      ?      1.87 3.23 4  3
+            312 6 A F EDO 405 .   ? ? 1 6  ?   4  0.097 0.941 ?      42.000  1.000 ?       ?       ?      ?      0.32 0.80 2  3
+            313 6 A G EDO 406 .   ? ? 1 6  ?   4  0.252 0.797 ?      57.320  1.000 ?       ?       ?      ?      0.73 0.61 2  3
+            314 7 A H SR  407 .   ? ? 1 ?  ?   1  0.143 1.000 ?      30.560  0.840 ?       ?       ?      ?      ?    ?    ?  ?
+            315 8 A I UNX 408 .   ? ? 1 ?  ?   1  0.321 0.940 ?      41.340  1.000 ?       ?       ?      ?      ?    ?    ?  ?
+            316 8 A J UNX 409 .   ? ? 1 ?  ?   1  0.611 0.922 ?      61.040  1.000 ?       ?       ?      ?      ?    ?    ?  ?
+            # ...
+        """
+        logger.debug("Starting with %r", dataContainer.getName())
+        #
+        rD = {}
+        try:
+            # Exit if no source categories are present
+            if not (
+                dataContainer.exists("pdbx_vrpt_instance_results")
+                or dataContainer.exists("pdbx_vrpt_bond_outliers")
+                or dataContainer.exists("pdbx_vrpt_angle_outliers")
+                or dataContainer.exists("pdbx_vrpt_mogul_bond_outliers")
+                or dataContainer.exists("pdbx_vrpt_mogul_angle_outliers")
+            ):
+                return rD
+            # ------- --------- ------- --------- ------- --------- ------- --------- ------- ---------
+            nonPolyMissingAtomD = self.getUnobservedNonPolymerAtomInfo(dataContainer)
+            #
+            instanceModelOutlierD = {}
+            instanceModelValidationD = {}
+            vObj = None
+            if dataContainer.exists("pdbx_vrpt_bond_outliers"):
+                vObj = dataContainer.getObj("pdbx_vrpt_bond_outliers")
+            if vObj:
+                for ii in range(vObj.getRowCount()):
+                    seqId = vObj.getValueOrDefault("label_seq_id", ii, defaultValue=None)
+                    if seqId:
+                        modelId = vObj.getValueOrDefault("PDB_model_num", ii, defaultValue=None)
+                        asymId = vObj.getValueOrDefault("label_asym_id", ii, defaultValue=None)
+                        compId = vObj.getValueOrDefault("label_comp_id", ii, defaultValue=None)
+                        #
+                        atomI = vObj.getValueOrDefault("atom0", ii, defaultValue=None)
+                        atomJ = vObj.getValueOrDefault("atom1", ii, defaultValue=None)
+                        obsDist = vObj.getValueOrDefault("obs", ii, defaultValue=None)
+                        zVal = vObj.getValueOrDefault("Z", ii, defaultValue=None)
+                        tS = "%s-%s dist=%s Z=%s" % (atomI, atomJ, obsDist, zVal)
+                        #
+                        instanceModelOutlierD.setdefault((modelId, asymId, True), []).append(
+                            OutlierValue(
+                                compId,
+                                int(seqId),
+                                "BOND_OUTLIER",
+                                tS,
+                            )
+                        )
+                #
+                logger.debug("length instanceModelOutlierD %d", len(instanceModelOutlierD))
+            # ----
+            vObj = None
+            if dataContainer.exists("pdbx_vrpt_angle_outliers"):
+                vObj = dataContainer.getObj("pdbx_vrpt_angle_outliers")
+            if vObj:
+                for ii in range(vObj.getRowCount()):
+                    seqId = vObj.getValueOrDefault("label_seq_id", ii, defaultValue=None)
+                    if seqId:
+                        modelId = vObj.getValueOrDefault("PDB_model_num", ii, defaultValue=None)
+                        asymId = vObj.getValueOrDefault("label_asym_id", ii, defaultValue=None)
+                        compId = vObj.getValueOrDefault("label_comp_id", ii, defaultValue=None)
+                        #
+                        atomI = vObj.getValueOrDefault("atom0", ii, defaultValue=None)
+                        atomJ = vObj.getValueOrDefault("atom1", ii, defaultValue=None)
+                        atomK = vObj.getValueOrDefault("atom2", ii, defaultValue=None)
+                        obsDist = vObj.getValueOrDefault("obs", ii, defaultValue=None)
+                        zVal = vObj.getValueOrDefault("Z", ii, defaultValue=None)
+                        tS = "%s-%s-%s angle=%s Z=%s" % (atomI, atomJ, atomK, obsDist, zVal)
+                        #
+                        instanceModelOutlierD.setdefault((modelId, asymId, True), []).append(
+                            OutlierValue(
+                                compId,
+                                int(seqId),
+                                "ANGLE_OUTLIER",
+                                tS,
+                            )
+                        )
+                #
+                logger.debug("length instanceModelOutlierD %d", len(instanceModelOutlierD))
+            # ----
+            vObj = None
+            if dataContainer.exists("pdbx_vrpt_mogul_bond_outliers"):
+                vObj = dataContainer.getObj("pdbx_vrpt_mogul_bond_outliers")
+            if vObj:
+                for ii in range(vObj.getRowCount()):
+                    seqId = vObj.getValueOrDefault("label_seq_id", ii, defaultValue=None)
+
+                    modelId = vObj.getValueOrDefault("PDB_model_num", ii, defaultValue=None)
+                    asymId = vObj.getValueOrDefault("label_asym_id", ii, defaultValue=None)
+                    compId = vObj.getValueOrDefault("label_comp_id", ii, defaultValue=None)
+                    #
+                    atoms = vObj.getValueOrDefault("atoms", ii, defaultValue=None)
+                    obsDist = vObj.getValueOrDefault("obsval", ii, defaultValue=None)
+                    meanValue = vObj.getValueOrDefault("mean", ii, defaultValue=None)
+                    zVal = vObj.getValueOrDefault("Zscore", ii, defaultValue=None)
+                    tS = "%s angle=%s Z=%s" % (atoms, obsDist, zVal)
+                    # OutlierValue = collections.namedtuple("OutlierValue", "compId, seqId, outlierType, description, reported, reference, uncertaintyValue, uncertaintyType")
+                    if seqId:
+                        instanceModelOutlierD.setdefault((modelId, asymId, True), []).append(
+                            OutlierValue(
+                                compId,
+                                int(seqId),
+                                "MOGUL_BOND_OUTLIER",
+                                tS,
+                            )
+                        )
+                    else:
+                        instanceModelOutlierD.setdefault((modelId, asymId, False), []).append(
+                            OutlierValue(compId, None, "MOGUL_BOND_OUTLIER", tS, obsDist, meanValue, zVal, "Z-Score")
+                        )
+                #
+                logger.debug("length instanceModelOutlierD %d", len(instanceModelOutlierD))
+
+            vObj = None
+            if dataContainer.exists("pdbx_vrpt_mogul_angle_outliers"):
+                vObj = dataContainer.getObj("pdbx_vrpt_mogul_angle_outliers")
+            if vObj:
+                for ii in range(vObj.getRowCount()):
+                    seqId = vObj.getValueOrDefault("label_seq_id", ii, defaultValue=None)
+
+                    modelId = vObj.getValueOrDefault("PDB_model_num", ii, defaultValue=None)
+                    asymId = vObj.getValueOrDefault("label_asym_id", ii, defaultValue=None)
+                    compId = vObj.getValueOrDefault("label_comp_id", ii, defaultValue=None)
+                    #
+                    atoms = vObj.getValueOrDefault("atoms", ii, defaultValue=None)
+                    obsDist = vObj.getValueOrDefault("obsval", ii, defaultValue=None)
+                    meanValue = vObj.getValueOrDefault("mean", ii, defaultValue=None)
+                    zVal = vObj.getValueOrDefault("Zscore", ii, defaultValue=None)
+                    tS = "%s angle=%s Z=%s" % (atoms, obsDist, zVal)
+                    if seqId:
+                        instanceModelOutlierD.setdefault((modelId, asymId, True), []).append(
+                            OutlierValue(
+                                compId,
+                                int(seqId),
+                                "MOGUL_ANGLE_OUTLIER",
+                                tS,
+                            )
+                        )
+                    else:
+                        instanceModelOutlierD.setdefault((modelId, asymId, False), []).append(
+                            OutlierValue(compId, None, "MOGUL_ANGLE_OUTLIER", tS, obsDist, meanValue, zVal, "Z-Score")
+                        )
+                logger.debug("length instanceModelOutlierD %d", len(instanceModelOutlierD))
+                #
+                #
+            vObj = None
+            if dataContainer.exists("pdbx_vrpt_instance_results"):
+                vObj = dataContainer.getObj("pdbx_vrpt_instance_results")
+
+            if vObj:
+                logger.debug("Row count for %s: %d", vObj.getName(), vObj.getRowCount())
+                for ii in range(vObj.getRowCount()):
+                    seqId = vObj.getValueOrDefault("label_seq_id", ii, defaultValue=None)
+                    modelId = vObj.getValueOrDefault("PDB_model_num", ii, defaultValue=None)
+                    asymId = vObj.getValueOrDefault("label_asym_id", ii, defaultValue=None)
+                    compId = vObj.getValueOrDefault("label_comp_id", ii, defaultValue=None)
+                    #
+                    rotamerClass = vObj.getValueOrDefault("rotamer_class", ii, defaultValue=None)
+                    ramaClass = vObj.getValueOrDefault("ramachandran_class", ii, defaultValue=None)
+                    rsr = vObj.getValueOrDefault("RSR", ii, defaultValue=None)
+                    rsrZ = vObj.getValueOrDefault("RSRZ", ii, defaultValue=None)
+                    rsrCc = vObj.getValueOrDefault("RSRCC", ii, defaultValue=None)
+                    #
+                    anglesRmsZ = vObj.getValueOrDefault("mogul_angles_RMSZ", ii, defaultValue=None)
+                    bondsRmsZ = vObj.getValueOrDefault("mogul_bonds_RMSZ", ii, defaultValue=None)
+                    #
+                    if seqId:
+                        if rotamerClass and rotamerClass.upper() == "OUTLIER":
+                            instanceModelOutlierD.setdefault((modelId, asymId, True), []).append(
+                                OutlierValue(
+                                    compId,
+                                    int(seqId),
+                                    "ROTAMER_OUTLIER",
+                                    None,
+                                )
+                            )
+                        if ramaClass and ramaClass.upper() == "OUTLIER":
+                            instanceModelOutlierD.setdefault((modelId, asymId, True), []).append(
+                                OutlierValue(
+                                    compId,
+                                    int(seqId),
+                                    "RAMACHANDRAN_OUTLIER",
+                                    None,
+                                )
+                            )
+                        if rsrZ and float(rsrZ) > 2.0:
+                            tS = "%s > 2.0" % rsrZ
+                            instanceModelOutlierD.setdefault((modelId, asymId, True), []).append(
+                                OutlierValue(
+                                    compId,
+                                    int(seqId),
+                                    "RSRZ_OUTLIER",
+                                    tS,
+                                )
+                            )
+                        if rsrCc and float(rsrCc) < 0.650:
+                            tS = "RSCC < 0.65"
+                            instanceModelOutlierD.setdefault((modelId, asymId, True), []).append(
+                                OutlierValue(
+                                    compId,
+                                    int(seqId),
+                                    "RSCC_OUTLIER",
+                                    tS,
+                                )
+                            )
+                    else:
+                        if rsrZ and float(rsrZ) > 2.0:
+                            tS = "%s > 2.0" % rsrZ
+                            instanceModelOutlierD.setdefault((modelId, asymId, False), []).append(OutlierValue(compId, None, "RSRZ_OUTLIER", tS, rsr, None, rsrZ, "Z-Score"))
+                        if rsrCc and float(rsrCc) < 0.650:
+                            tS = "RSCC < 0.65"
+                            instanceModelOutlierD.setdefault((modelId, asymId, False), []).append(OutlierValue(compId, None, "RSCC_OUTLIER", tS, rsrCc))
+                        # NonpolymerValidationFields = ("rsr", "rscc",  "mogul_bonds_rmsz", "mogul_angles_rmsz", "heavy_atom_count", "modeled_heavy_atom_count")
+                        # "nonPolyMissingAtomD": {(modelId, compId, asymId, zeroOccFlag): [atomId,...], },
+                        missingAtomCount = len(nonPolyMissingAtomD[(modelId, compId, asymId, 0)]) if (modelId, compId, asymId, 0) in nonPolyMissingAtomD else 0
+                        missingAtomCount += len(nonPolyMissingAtomD[(modelId, compId, asymId, 1)]) if (modelId, compId, asymId, 1) in nonPolyMissingAtomD else 0
+                        instanceModelValidationD[(modelId, asymId, compId)] = NonpolymerValidationInstance(
+                            float(rsr) if rsr else None,
+                            float(rsrCc) if rsrCc else None,
+                            float(bondsRmsZ) if bondsRmsZ else None,
+                            float(anglesRmsZ) if anglesRmsZ else None,
+                            missingAtomCount,
+                        )
+                        if missingAtomCount > 0:
+                            logger.debug("%s %s missing atom count %d", dataContainer.getName(), compId, missingAtomCount)
+                #
+            logger.debug("instanceModelOutlierD %r", instanceModelOutlierD)
+            logger.debug("instanceModelValidationD %r", instanceModelValidationD)
+
+            rD = {"instanceModelOutlierD": instanceModelOutlierD, "instanceModelValidationD": instanceModelValidationD}
+        except Exception as e:
+            logger.exception("%s failing with %s", dataContainer.getName(), str(e))
+        return rD
diff --git a/rcsb/utils/dictionary/DictMethodEntityHelper.py b/rcsb/utils/dictionary/DictMethodEntityHelper.py
new file mode 100644
index 0000000..8bc9eaf
--- /dev/null
+++ b/rcsb/utils/dictionary/DictMethodEntityHelper.py
@@ -0,0 +1,1960 @@
+##
+# File:    DictMethodEntityHelper.py
+# Author:  J. Westbrook
+# Date:    16-Jul-2019
+# Version: 0.001 Initial version
+#
+##
+"""
+Helper class implements methods supporting entity-level item and category methods in the RCSB dictionary extension.
+
+"""
+__docformat__ = "restructuredtext en"
+__author__ = "John Westbrook"
+__email__ = "jwest@rcsb.rutgers.edu"
+__license__ = "Apache 2.0"
+
+# pylint: disable=too-many-lines
+
+import functools
+import itertools
+import logging
+import re
+
+from collections import defaultdict, OrderedDict
+
+from mmcif.api.DataCategory import DataCategory
+from rcsb.utils.seq.SeqAlign import splitSeqAlignObjList
+
+logger = logging.getLogger(__name__)
+
+
+def cmpElements(lhs, rhs):
+    return 0 if (lhs[-1].isdigit() or lhs[-1] in ["R", "S"]) and rhs[0].isdigit() else -1
+
+
+class DictMethodEntityHelper(object):
+    """Helper class implements methods supporting entity-level item and category methods in the RCSB dictionary extension."""
+
+    def __init__(self, **kwargs):
+        """
+        Args:
+            resourceProvider: (obj) instance of DictMethodResourceProvider()
+
+        """
+        #
+        self._raiseExceptions = kwargs.get("raiseExceptions", False)
+        self.__wsPattern = re.compile(r"\s+", flags=re.UNICODE | re.MULTILINE)
+        self.__reNonDigit = re.compile(r"[^\d]+")
+        #
+        rP = kwargs.get("resourceProvider")
+        self.__commonU = rP.getResource("DictMethodCommonUtils instance") if rP else None
+        self.__dApi = rP.getResource("Dictionary API instance (pdbx_core)") if rP else None
+        #
+        self.__useSiftsAlign = rP.getReferenceSequenceAlignmentOpt() == "SIFTS"
+        # logger.info("SIFTS alignment option %r", self.__useSiftsAlign)
+        #
+        self.__ssP = None
+        if self.__useSiftsAlign:
+            self.__ssP = rP.getResource("SiftsSummaryProvider instance") if rP else None
+            self.__useSiftsAlign = False if not self.__ssP else self.__useSiftsAlign
+        #
+        self.__ccP = rP.getResource("ChemCompProvider instance") if rP else None
+
+        #
+        logger.debug("Dictionary entity method helper init")
+
+    def __processSiftsAlignments(self, dataContainer):
+        #
+        tObj = dataContainer.getObj("entry")
+        entryId = tObj.getValue("id", 0)
+        #
+        asymIdD = self.__commonU.getInstanceEntityMap(dataContainer)
+        asymAuthIdD = self.__commonU.getAsymAuthIdMap(dataContainer)
+        instTypeD = self.__commonU.getInstanceTypes(dataContainer)
+        siftsEntityAlignD = {}
+        #
+        # Process sifts alignments -
+        siftsAlignD = {}
+        for asymId, authAsymId in asymAuthIdD.items():
+            if instTypeD[asymId] not in ["polymer", "branched"]:
+                continue
+            entityId = asymIdD[asymId]
+            # accumulate the sifts alignments by entity.
+            # siftsAlignD.setdefault((entryId, entityId), []).extend([SeqAlign("SIFTS", **sa) for sa in self.__ssP.getIdentifiers(entryId, authAsymId, idType="UNPAL")])
+            siftsAlignD.setdefault((entryId, entityId), []).extend(self.__ssP.getSeqAlignObjList(entryId, authAsymId))
+        for (entryId, entityId), seqAlignObjL in siftsAlignD.items():
+            if seqAlignObjL:
+                # re-group alignments by common accession
+                alRefD = {}
+                for seqAlignObj in seqAlignObjL:
+                    alRefD.setdefault((seqAlignObj.getDbName(), seqAlignObj.getDbAccession(), seqAlignObj.getDbIsoform()), []).append(seqAlignObj)
+                #
+                # Get the longest overlapping entity region of each ref alignment -
+                for (dbName, dbAcc, dbIsoform), aL in alRefD.items():
+                    alGrpD = splitSeqAlignObjList(aL)
+                    logger.debug("SIFTS -> entryId %s entityId %s dbName %r dbAcc %r dbIsoform %r alGrpD %r", entryId, entityId, dbName, dbAcc, dbIsoform, alGrpD)
+                    for _, grpAlignL in alGrpD.items():
+
+                        lenL = [seqAlignObj.getEntityAlignLength() for seqAlignObj in grpAlignL]
+                        idxMax = lenL.index(max(lenL))
+                        siftsEntityAlignD.setdefault((entryId, entityId, "SIFTS"), {}).setdefault((dbName, dbAcc, dbIsoform), []).append(grpAlignL[idxMax])
+        #
+        logger.debug("PROCESSED SIFTS ->  %r", siftsEntityAlignD)
+        return siftsEntityAlignD
+
+    def __processPdbAlignments(self, dataContainer):
+        #
+        tObj = dataContainer.getObj("entry")
+        entryId = tObj.getValue("id", 0)
+        #
+        entityRefAlignmentD = self.__commonU.getEntityReferenceAlignments(dataContainer)
+        pdbEntityAlignD = {}
+        # --- PDB alignments -
+        for entityId, seqAlignObjL in entityRefAlignmentD.items():
+            # seqAlignObjL = [SeqAlign("PDB", **sa) for sa in entityAlignL]
+            if seqAlignObjL:
+                alRefD = {}
+                for seqAlignObj in seqAlignObjL:
+                    alRefD.setdefault((seqAlignObj.getDbName(), seqAlignObj.getDbAccession(), seqAlignObj.getDbIsoform()), []).append(seqAlignObj)
+                for (dbName, dbAcc, dbIsoform), aL in alRefD.items():
+                    alGrpD = splitSeqAlignObjList(aL)
+                    logger.debug("PDB -> entryId %s entityId %s dbName %r dbAcc %r dbIsoform %r alGrpD %r", entryId, entityId, dbName, dbAcc, dbIsoform, alGrpD)
+                    for _, grpAlignL in alGrpD.items():
+                        # get the longest overlapping entity region of each ref seq -
+                        lenL = [seqAlignObj.getEntityAlignLength() for seqAlignObj in grpAlignL]
+                        idxMax = lenL.index(max(lenL))
+                        try:
+                            tLen = grpAlignL[idxMax].getEntityAlignLength()
+                            if tLen and tLen > 0:
+                                pdbEntityAlignD.setdefault((entryId, entityId, "PDB"), {}).setdefault((dbName, dbAcc, dbIsoform), []).append(grpAlignL[idxMax])
+                            else:
+                                logger.warning("Skipping %s inconsistent alignment for entity %r %r", entryId, entityId, seqAlignObjL)
+                        except Exception:
+                            pass
+            #
+        logger.debug("PROCESSED PDB   ->  %r", pdbEntityAlignD)
+        return pdbEntityAlignD
+
+    def addPolymerEntityReferenceAlignments(self, dataContainer, catName, **kwargs):
+        """[summary]
+
+        Args:
+            dataContainer ([type]): [description]
+            catName ([type]): [description]
+
+        Returns:
+            [type]: [description]
+
+        Example:
+            _rcsb_polymer_entity_align.ordinal
+            _rcsb_polymer_entity_align.entry_id
+            _rcsb_polymer_entity_align.entity_id
+            #
+            _rcsb_polymer_entity_align.reference_database_name
+            _rcsb_polymer_entity_align.reference_database_accession
+            _rcsb_polymer_entity_align.provenance_source
+            #
+            _rcsb_polymer_entity_align.aligned_regions_ref_beg_seq_id
+            _rcsb_polymer_entity_align.aligned_regions_entity_beg_seq_id
+            _rcsb_polymer_entity_align.aligned_regions_length
+            #
+        """
+        dbNameMapD = self.__commonU.getDatabaseNameMap()
+        logger.debug("Starting %s catName %s  kwargs %r", dataContainer.getName(), catName, kwargs)
+        try:
+            if not (dataContainer.exists("entry") and dataContainer.exists("entity")):
+                return False
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            #
+            cObj = dataContainer.getObj(catName)
+            #
+            pdbEntityAlignD = self.__processPdbAlignments(dataContainer)
+            #
+            if self.__useSiftsAlign:
+                siftsEntityAlignD = self.__processSiftsAlignments(dataContainer)
+                logger.debug("siftsEntityAlignD %d", len(siftsEntityAlignD))
+                #
+                for (entryId, entityId, provSource), refD in siftsEntityAlignD.items():
+                    if (entryId, entityId, "PDB") in pdbEntityAlignD:
+                        del pdbEntityAlignD[(entryId, entityId, "PDB")]
+                    pdbEntityAlignD.update({(entryId, entityId, provSource): refD})
+            #
+            # ---
+
+            iRow = cObj.getRowCount()
+            for (entryId, entityId, provSource), refD in pdbEntityAlignD.items():
+                #
+                for (dbName, dbAcc, dbIsoform), saoL in refD.items():
+                    #
+                    if dbName not in dbNameMapD:
+                        logger.error("Skipping unsupported reference database %r for entry %s entity %s", dbName, entryId, entityId)
+                        continue
+                    #
+                    cObj.setValue(iRow + 1, "ordinal", iRow)
+                    cObj.setValue(entryId, "entry_id", iRow)
+                    cObj.setValue(entityId, "entity_id", iRow)
+                    #
+                    dispDbName = dbNameMapD[dbName]
+                    cObj.setValue(dispDbName, "reference_database_name", iRow)
+                    cObj.setValue(dbAcc, "reference_database_accession", iRow)
+                    if dbIsoform:
+                        cObj.setValue(dbIsoform, "reference_database_isoform", iRow)
+                    cObj.setValue(provSource, "provenance_source", iRow)
+                    #
+                    cObj.setValue(",".join([str(sao.getDbSeqIdBeg()) for sao in saoL]), "aligned_regions_ref_beg_seq_id", iRow)
+                    cObj.setValue(",".join([str(sao.getEntitySeqIdBeg()) for sao in saoL]), "aligned_regions_entity_beg_seq_id", iRow)
+                    cObj.setValue(",".join([str(sao.getEntityAlignLength()) for sao in saoL]), "aligned_regions_length", iRow)
+                    iRow += 1
+
+            return True
+        except Exception as e:
+            logger.exception("For %s  %s failing with %s", dataContainer.getName(), catName, str(e))
+        return False
+
+        #
+
+    def buildContainerEntityIds(self, dataContainer, catName, **kwargs):
+        """Load the input category with rcsb_entity_container_identifiers content.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+
+        Returns:
+            bool: True for success or False otherwise
+
+        For example, build:
+
+        loop_
+        _rcsb_entity_container_identifiers.entry_id
+        _rcsb_entity_container_identifiers.entity_id
+        #
+        _rcsb_entity_container_identifiers.asym_ids
+        _rcsb_entity_container_identifiers.auth_asym_ids
+        #
+        _rcsb_entity_container_identifiers.nonpolymer_comp_id
+        _rcsb_entity_container_identifiers.chem_comp_monomers
+
+        _rcsb_entity_container_identifiers.prd_id
+        ...
+        """
+        logger.debug("Starting catName %s  kwargs %r", catName, kwargs)
+        try:
+            if not (dataContainer.exists("entry") and dataContainer.exists("entity")):
+                return False
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            #
+            cObj = dataContainer.getObj(catName)
+            #
+            tObj = dataContainer.getObj("entry")
+            entryId = tObj.getValue("id", 0)
+            cObj.setValue(entryId, "entry_id", 0)
+            #
+            tObj = dataContainer.getObj("entity")
+            entityIdL = tObj.getAttributeValueList("id")
+            seqEntityRefDbD = self.__commonU.getEntitySequenceReferenceCodes(dataContainer)
+            #
+            entityTypeUniqueIds = self.__commonU.getEntityTypeUniqueIds(dataContainer)
+            entityPolymerModMonomerIds = self.__commonU.getPolymerEntityModifiedMonomers(dataContainer)
+            #  -------
+            eTypeD = self.__commonU.getEntityTypes(dataContainer)
+            aObj = dataContainer.getObj("struct_asym")
+            if not aObj.hasAttribute("rcsb_entity_type"):
+                aObj.appendAttribute("rcsb_entity_type")
+            for ii in range(aObj.getRowCount()):
+                entityId = aObj.getValue("entity_id", ii)
+                aObj.setValue(eTypeD[entityId], "rcsb_entity_type", ii)
+            # ---------
+            ii = 0
+            for entityId in entityIdL:
+                cObj.setValue(entryId, "entry_id", ii)
+                cObj.setValue(entityId, "entity_id", ii)
+                cObj.setValue(entryId + "_" + entityId, "rcsb_id", ii)
+                eType = tObj.getValue("type", ii)
+                asymIdL = []
+                authAsymIdL = []
+                ccMonomerL = []
+                ccLigandL = []
+                modPolymerMonomerL = entityPolymerModMonomerIds[entityId] if entityId in entityPolymerModMonomerIds else []
+                #
+                refSeqIdD = {"dbName": [], "dbAccession": [], "provSource": [], "dbIsoform": []}
+
+                asymIdL = entityTypeUniqueIds[eType][entityId]["asymIds"] if eType in entityTypeUniqueIds else []
+                authAsymIdL = entityTypeUniqueIds[eType][entityId]["authAsymIds"] if eType in entityTypeUniqueIds else []
+                ccMonomerL = entityTypeUniqueIds[eType][entityId]["ccIds"] if eType in entityTypeUniqueIds else []
+
+                if eType in ["polymer", "non-polymer", "branched"] and not asymIdL:
+                    logger.warning("%s inconsistent molecular system (no instances) for %r entity %s", entryId, eType, entityId)
+                #
+                if eType == "polymer":
+
+                    if self.__useSiftsAlign:
+                        dbIdL = []
+                        for authAsymId in authAsymIdL:
+                            dbIdL.extend(self.__ssP.getIdentifiers(entryId, authAsymId, idType="UNPID"))
+                        # If SIFTS is defined
+                        if dbIdL:
+                            for dbId in sorted(set(dbIdL)):
+                                refSeqIdD["dbName"].append("UniProt")
+                                refSeqIdD["provSource"].append("SIFTS")
+                                refSeqIdD["dbAccession"].append(dbId)
+                                refSeqIdD["dbIsoform"].append("?")
+                        # else fallback to PDB
+                        elif entityId in seqEntityRefDbD:
+                            for dbD in seqEntityRefDbD[entityId]:
+                                refSeqIdD["dbName"].append(dbD["dbName"])
+                                refSeqIdD["provSource"].append("PDB")
+                                refSeqIdD["dbAccession"].append(dbD["dbAccession"])
+                                #
+                                if dbD["dbIsoform"]:
+                                    refSeqIdD["dbIsoform"].append(dbD["dbIsoform"])
+                                else:
+                                    refSeqIdD["dbIsoform"].append("?")
+
+                    else:
+                        if entityId in seqEntityRefDbD:
+                            for dbD in seqEntityRefDbD[entityId]:
+                                refSeqIdD["dbName"].append(dbD["dbName"])
+                                refSeqIdD["provSource"].append("PDB")
+                                refSeqIdD["dbAccession"].append(dbD["dbAccession"])
+                                #
+                                if dbD["dbIsoform"]:
+                                    refSeqIdD["dbIsoform"].append(dbD["dbIsoform"])
+                                else:
+                                    refSeqIdD["dbIsoform"].append("?")
+
+                #
+                # logger.info("refSeqIdD %r %r %r", entryId, entityId, refSeqIdD)
+
+                if asymIdL:
+                    cObj.setValue(",".join(sorted(set(asymIdL))).strip(), "asym_ids", ii)
+                if authAsymIdL:
+                    cObj.setValue(",".join(sorted(set(authAsymIdL))).strip(), "auth_asym_ids", ii)
+                if ccMonomerL and eType in ["branched", "polymer"]:
+                    cObj.setValue(",".join(sorted(set(ccMonomerL))).strip(), "chem_comp_monomers", ii)
+                else:
+                    cObj.setValue("?", "chem_comp_monomers", ii)
+                #
+                if modPolymerMonomerL:
+                    cObj.setValue(",".join(sorted(set(modPolymerMonomerL))).strip(), "chem_comp_nstd_monomers", ii)
+                else:
+                    cObj.setValue("?", "chem_comp_nstd_monomers", ii)
+                #
+                if eType in ["non-polymer"] and ccMonomerL:
+                    cObj.setValue(",".join(sorted(set(ccLigandL))).strip(), "nonpolymer_comp_id", ii)
+                else:
+                    cObj.setValue("?", "nonpolymer_comp_id", ii)
+                #
+                if refSeqIdD["dbName"]:
+                    cObj.setValue(",".join(refSeqIdD["dbName"]).strip(), "reference_sequence_identifiers_database_name", ii)
+                    cObj.setValue(",".join(refSeqIdD["dbAccession"]).strip(), "reference_sequence_identifiers_database_accession", ii)
+                    cObj.setValue(",".join(refSeqIdD["provSource"]).strip(), "reference_sequence_identifiers_provenance_source", ii)
+                    cObj.setValue(",".join(refSeqIdD["dbIsoform"]).strip(), "reference_sequence_identifiers_database_isoform", ii)
+                #
+
+                ii += 1
+            _ = self.__addEntityCompIds(dataContainer)
+            _ = self.__addBirdEntityIds(dataContainer)
+
+            return True
+        except Exception as e:
+            logger.exception("For %s  %s failing with %s", dataContainer.getName(), catName, str(e))
+        return False
+
+    def __salvageMissingTaxonomy(self, dataContainer, **kwargs):
+        """Add missing taxonomy identifiers using scientific name as a surogate.
+
+        Args:
+            dataContainer (obj): data container object
+
+        Returns:
+            bool: True for success or False otherwise
+        """
+        #
+        ok = False
+        try:
+            rP = kwargs.get("resourceProvider")
+            taxU = rP.getResource("TaxonomyProvider instance") if rP else None
+            # "pdbx_gene_src_scientific_name" "pdbx_gene_src_ncbi_taxonomy_id"
+            for catName, atSn, atTaxId in [
+                ("entity_src_gen", "pdbx_gene_src_scientific_name", "pdbx_gene_src_ncbi_taxonomy_id"),
+                ("entity_src_gen", "pdbx_host_org_scientific_name", "pdbx_host_org_ncbi_taxonomy_id"),
+                ("entity_src_nat", "pdbx_organism_scientific", "pdbx_ncbi_taxonomy_id"),
+                ("entity_src_syn", "organism_scientific", "ncbi_taxonomy_id"),
+            ]:
+                if dataContainer.exists(catName):
+                    sObj = dataContainer.getObj(catName)
+                    for ii in range(sObj.getRowCount()):
+                        taxId = sObj.getValueOrDefault(atTaxId, ii, defaultValue=None)
+                        #
+                        if taxId:
+                            continue
+                        sn = sObj.getValueOrDefault(atSn, ii, defaultValue=None)
+                        if sn:
+                            taxId = taxU.getTaxId(sn)
+                            if taxId:
+                                if not sObj.hasAttribute(atTaxId):
+                                    sObj.appendAttribute(atTaxId)
+                                logger.debug("%s salvaged taxId %r using %r", dataContainer.getName(), taxId, sn)
+                                sObj.setValue(str(taxId), atTaxId, ii)
+                            else:
+                                logger.warning("%s taxId salvage fails for scientific name %s", dataContainer.getName(), sn)
+            ok = True
+        except Exception as e:
+            logger.exception("Failing for %r with %s", dataContainer.getName(), str(e))
+
+        return ok
+
+    def filterSourceOrganismDetails(self, dataContainer, catName, **kwargs):
+        """Load new categories rcsb_entity_source_organism and rcsb_entity_host_organism
+         and add related source flags in the entity category.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+
+        Returns:
+            bool: True for success or False otherwise
+
+        For intance, select relevant source and host organism details from
+        primary data categories and load
+
+        Build:
+            loop_
+            _rcsb_entity_source_organism.entity_id
+            _rcsb_entity_source_organism.pdbx_src_id
+            _rcsb_entity_source_organism.source_type
+            _rcsb_entity_source_organism.scientific_name
+            _rcsb_entity_source_organism.common_name
+            _rcsb_entity_source_organism.ncbi_taxonomy_id
+            _rcsb_entity_source_organism.provenance_source
+            _rcsb_entity_source_organism.beg_seq_num
+            _rcsb_entity_source_organism.end_seq_num
+            _rcsb_entity_source_organism.taxonomy_lineage_id
+            _rcsb_entity_source_organism.taxonomy_lineage_name
+            _rcsb_entity_source_organism.taxonomy_lineage_depth
+            1 1 natural 'Homo sapiens' human 9606  'PDB Primary Data' 1 202 . . .
+            # ... abbreviated
+
+
+            loop_
+            _rcsb_entity_host_organism.entity_id
+            _rcsb_entity_host_organism.pdbx_src_id
+            _rcsb_entity_host_organism.scientific_name
+            _rcsb_entity_host_organism.common_name
+            _rcsb_entity_host_organism.ncbi_taxonomy_id
+            _rcsb_entity_host_organism.provenance_source
+            _rcsb_entity_host_organism.beg_seq_num
+            _rcsb_entity_host_organism.end_seq_num
+            _rcsb_entity_host_organism.taxonomy_lineage_id
+            _rcsb_entity_host_organism.taxonomy_lineage_name
+            _rcsb_entity_host_organism.taxonomy_lineage_depth
+                        1 1 'Escherichia coli' 'E. coli' 562  'PDB Primary Data' 1 102 .  . .
+            # ... abbreviated
+
+            And two related items -
+
+            _entity.rcsb_multiple_source_flag
+            _entity.rcsb_source_part_count
+
+        """
+        #
+        hostCatName = "rcsb_entity_host_organism"
+        try:
+            logger.debug("Starting with  %r %r", dataContainer.getName(), catName)
+            if catName == hostCatName:
+                logger.debug("Skipping method for %r %r", dataContainer.getName(), catName)
+                return True
+            #
+            # if there is no source information then exit
+            if not (dataContainer.exists("entity_src_gen") or dataContainer.exists("entity_src_nat") or dataContainer.exists("pdbx_entity_src_syn")):
+                return False
+            #
+            # Try to supply missing taxIds
+            self.__salvageMissingTaxonomy(dataContainer, **kwargs)
+            #
+            # Create the new target category
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            #
+            if not dataContainer.exists(hostCatName):
+                dataContainer.append(DataCategory(hostCatName, attributeNameList=self.__dApi.getAttributeNameList(hostCatName)))
+            #
+            rP = kwargs.get("resourceProvider")
+            taxU = rP.getResource("TaxonomyProvider instance") if rP else None
+            #
+            cObj = dataContainer.getObj(catName)
+            hObj = dataContainer.getObj(hostCatName)
+            #
+            s1Obj = dataContainer.getObj("entity_src_gen")
+            atHTupL = [
+                ("entity_id", "entity_id"),
+                ("pdbx_host_org_scientific_name", "scientific_name"),
+                ("pdbx_host_org_common_name", "common_name"),
+                ("pdbx_host_org_ncbi_taxonomy_id", "ncbi_taxonomy_id"),
+                ("pdbx_src_id", "pdbx_src_id"),
+                ("pdbx_beg_seq_num", "beg_seq_num"),
+                ("pdbx_end_seq_num", "end_seq_num"),
+            ]
+            atHSL, atHL = self.__getAttribList(s1Obj, atHTupL)
+            #
+            at1TupL = [
+                ("entity_id", "entity_id"),
+                ("pdbx_gene_src_scientific_name", "scientific_name"),
+                ("gene_src_common_name", "common_name"),
+                ("pdbx_gene_src_ncbi_taxonomy_id", "ncbi_taxonomy_id"),
+                ("pdbx_src_id", "pdbx_src_id"),
+                ("pdbx_beg_seq_num", "beg_seq_num"),
+                ("pdbx_end_seq_num", "end_seq_num"),
+                ("pdbx_gene_src_gene", "rcsb_gene_name_value"),
+            ]
+            at1SL, at1L = self.__getAttribList(s1Obj, at1TupL)
+            #
+            s2Obj = dataContainer.getObj("entity_src_nat")
+            at2TupL = [
+                ("entity_id", "entity_id"),
+                ("pdbx_organism_scientific", "scientific_name"),
+                ("nat_common_name", "common_name"),
+                ("pdbx_ncbi_taxonomy_id", "ncbi_taxonomy_id"),
+                ("pdbx_src_id", "pdbx_src_id"),
+                ("pdbx_beg_seq_num", "beg_seq_num"),
+                ("pdbx_end_seq_num", "end_seq_num"),
+            ]
+            at2SL, at2L = self.__getAttribList(s2Obj, at2TupL)
+            #
+            s3Obj = dataContainer.getObj("pdbx_entity_src_syn")
+            at3TupL = [
+                ("entity_id", "entity_id"),
+                ("organism_scientific", "scientific_name"),
+                ("organism_common_name", "common_name"),
+                ("ncbi_taxonomy_id", "ncbi_taxonomy_id"),
+                ("pdbx_src_id", "pdbx_src_id"),
+                ("beg_seq_num", "beg_seq_num"),
+                ("end_seq_num", "end_seq_num"),
+            ]
+            at3SL, at3L = self.__getAttribList(s3Obj, at3TupL)
+            #
+            eObj = dataContainer.getObj("entity")
+            entityIdL = eObj.getAttributeValueList("id")
+            provSource = "PDB Primary Data"
+            #
+            partCountD = {}
+            srcL = []
+            hostL = []
+            for entityId in entityIdL:
+                partCountD[entityId] = 0
+                eL = []
+                tf = False
+                if s1Obj:
+                    sType = "genetically engineered"
+                    vL = s1Obj.selectValueListWhere(at1SL, entityId, "entity_id")
+                    if vL:
+                        for v in vL:
+                            eL.append((entityId, sType, at1L, v))
+                        logger.debug("%r entity %r - %r", sType, entityId, vL)
+                        partCountD[entityId] = len(eL)
+                        srcL.extend(eL)
+                        tf = True
+                    #
+                    vL = s1Obj.selectValueListWhere(atHSL, entityId, "entity_id")
+                    if vL:
+                        for v in vL:
+                            hostL.append((entityId, sType, atHL, v))
+                        logger.debug("%r entity %r - %r", sType, entityId, vL)
+                    if tf:
+                        continue
+
+                if s2Obj:
+                    sType = "natural"
+                    vL = s2Obj.selectValueListWhere(at2SL, entityId, "entity_id")
+                    if vL:
+                        for v in vL:
+                            eL.append((entityId, sType, at2L, v))
+                        logger.debug("%r entity %r - %r", sType, entityId, vL)
+                        partCountD[entityId] = len(eL)
+                        srcL.extend(eL)
+                        continue
+
+                if s3Obj:
+                    sType = "synthetic"
+                    vL = s3Obj.selectValueListWhere(at3SL, entityId, "entity_id")
+                    if vL:
+                        for v in vL:
+                            eL.append((entityId, sType, at3L, v))
+                        logger.debug("%r entity %r - %r", sType, entityId, vL)
+                        partCountD[entityId] = len(eL)
+                        srcL.extend(eL)
+                        continue
+
+            iRow = 0
+            entryTaxIdD = defaultdict(int)
+            entityTaxIdD = {}
+            for (entityId, sType, atL, tv) in srcL:
+                ii = atL.index("ncbi_taxonomy_id") if "ncbi_taxonomy_id" in atL else -1
+                if ii > 0 and len(tv[ii].split(",")) > 1:
+                    tvL = self.__normalizeCsvToList(dataContainer.getName(), tv)
+                    ii = atL.index("pdbx_src_id") if "pdbx_src_id" in atL else -1
+                    for jj, row in enumerate(tvL, 1):
+                        row[ii] = str(jj)
+                    partCountD[entityId] = len(tvL)
+                else:
+                    tvL = [tv]
+                for v in tvL:
+                    cObj.setValue(sType, "source_type", iRow)
+                    cObj.setValue(provSource, "provenance_source", iRow)
+                    for ii, at in enumerate(atL):
+                        # add check for missing values here
+                        if at in ["rcsb_gene_name_value"] and v[ii] and v[ii] not in [".", "?"]:
+                            tgL = v[ii].split(",")
+                            fgL = self.__filterCaseDuplicates(tgL)
+                            cObj.setValue(";".join(fgL), at, iRow)
+                            cObj.setValue(";".join([provSource for jj in range(len(tgL))]), "rcsb_gene_name_provenance_source", iRow)
+                        else:
+                            cObj.setValue(v[ii], at, iRow)
+                        # if at == 'ncbi_taxonomy_id' and v[ii] and v[ii] not in ['.', '?'] and v[ii].isdigit():
+                        if at == "ncbi_taxonomy_id" and v[ii] and v[ii] not in [".", "?"]:
+                            taxId = int(self.__reNonDigit.sub("", v[ii]))
+                            taxId = taxU.getMergedTaxId(taxId)
+                            cObj.setValue(str(taxId), "ncbi_taxonomy_id", iRow)
+                            entryTaxIdD[taxId] += 1
+                            entityTaxIdD.setdefault(entityId, set()).add(taxId)
+                            #
+                            sn = taxU.getScientificName(taxId)
+                            if sn:
+                                cObj.setValue(sn, "ncbi_scientific_name", iRow)
+                            #
+                            psn = taxU.getParentScientificName(taxId)
+                            if psn:
+                                cObj.setValue(psn, "ncbi_parent_scientific_name", iRow)
+                            #
+                            cnL = taxU.getCommonNames(taxId)
+                            if cnL:
+                                fcnL = self.__filterCaseDuplicates(cnL)
+                                cObj.setValue(";".join(list(OrderedDict.fromkeys(fcnL))), "ncbi_common_names", iRow)
+                            # Add lineage -
+                            linL = taxU.getLineageWithNames(taxId)
+                            if linL is not None:
+                                cObj.setValue(";".join([str(tup[0]) for tup in OrderedDict.fromkeys(linL)]), "taxonomy_lineage_depth", iRow)
+                                cObj.setValue(";".join([str(tup[1]) for tup in OrderedDict.fromkeys(linL)]), "taxonomy_lineage_id", iRow)
+                                cObj.setValue(";".join([str(tup[2]) for tup in OrderedDict.fromkeys(linL)]), "taxonomy_lineage_name", iRow)
+                            else:
+                                logger.warning("%s taxId %r lineage %r", dataContainer.getName(), taxId, linL)
+
+                    logger.debug("%r entity %r - UPDATED %r %r", sType, entityId, atL, v)
+                    iRow += 1
+            #
+            iRow = 0
+            for (entityId, sType, atL, tv) in hostL:
+                ii = atL.index("ncbi_taxonomy_id") if "ncbi_taxonomy_id" in atL else -1
+                if ii > 0 and len(tv[ii].split(",")) > 1:
+                    tvL = self.__normalizeCsvToList(dataContainer.getName(), tv)
+                    ii = atL.index("pdbx_src_id") if "pdbx_src_id" in atL else -1
+                    for jj, row in enumerate(tvL, 1):
+                        row[ii] = str(jj)
+                    # partCountD[entityId] = len(tvL)
+                else:
+                    tvL = [tv]
+                for v in tvL:
+                    hObj.setValue(provSource, "provenance_source", iRow)
+                    for ii, at in enumerate(atL):
+                        hObj.setValue(v[ii], at, iRow)
+                        #  if at == 'ncbi_taxonomy_id' and v[ii] and v[ii] not in ['.', '?'] and v[ii].isdigit():
+                        if at == "ncbi_taxonomy_id" and v[ii] and v[ii] not in [".", "?"]:
+                            taxId = int(self.__reNonDigit.sub("", v[ii]))
+                            taxId = taxU.getMergedTaxId(taxId)
+                            hObj.setValue(str(taxId), "ncbi_taxonomy_id", iRow)
+                            sn = taxU.getScientificName(taxId)
+                            if sn:
+                                hObj.setValue(sn, "ncbi_scientific_name", iRow)
+                            #
+                            psn = taxU.getParentScientificName(taxId)
+                            if psn:
+                                hObj.setValue(psn, "ncbi_parent_scientific_name", iRow)
+                            #
+                            cnL = taxU.getCommonNames(taxId)
+                            if cnL:
+                                hObj.setValue(";".join(sorted(set(cnL))), "ncbi_common_names", iRow)
+                            # Add lineage -
+                            linL = taxU.getLineageWithNames(taxId)
+                            if linL is not None:
+                                hObj.setValue(";".join([str(tup[0]) for tup in OrderedDict.fromkeys(linL)]), "taxonomy_lineage_depth", iRow)
+                                hObj.setValue(";".join([str(tup[1]) for tup in OrderedDict.fromkeys(linL)]), "taxonomy_lineage_id", iRow)
+                                hObj.setValue(";".join([str(tup[2]) for tup in OrderedDict.fromkeys(linL)]), "taxonomy_lineage_name", iRow)
+                            else:
+                                logger.warning("%s taxId %r lineage %r", dataContainer.getName(), taxId, linL)
+                    logger.debug("%r entity %r - UPDATED %r %r", sType, entityId, atL, v)
+                    iRow += 1
+            # -------------------------------------------------------------------------
+            # -------------------------------------------------------------------------
+            # Update entity attributes
+            #    _entity.rcsb_multiple_source_flag
+            #    _entity.rcsb_source_part_count
+            for atName in ["rcsb_source_part_count", "rcsb_multiple_source_flag", "rcsb_source_taxonomy_count"]:
+                if not eObj.hasAttribute(atName):
+                    eObj.appendAttribute(atName)
+            #
+            taxCountTotal = 0
+            for ii in range(eObj.getRowCount()):
+                entityId = eObj.getValue("id", ii)
+                cFlag = "Y" if partCountD[entityId] > 1 else "N"
+                eObj.setValue(partCountD[entityId], "rcsb_source_part_count", ii)
+                eObj.setValue(cFlag, "rcsb_multiple_source_flag", ii)
+                taxCount = len(entityTaxIdD[entityId]) if entityId in entityTaxIdD else 0
+                eObj.setValue(taxCount, "rcsb_source_taxonomy_count", ii)
+                taxCountTotal += taxCount
+
+            logger.debug("Entities with taxonomies %d entry total taxonomy count is %d", len(entryTaxIdD), taxCountTotal)
+            if dataContainer.exists("rcsb_entry_info"):
+                eiObj = dataContainer.getObj("rcsb_entry_info")
+                eiObj.setValue(taxCountTotal, "polymer_entity_taxonomy_count", 0)
+            #
+            return True
+        except Exception as e:
+            logger.exception("In %s for %s failing with %s", dataContainer.getName(), catName, str(e))
+        return False
+
+    def __addEntityCompIds(self, dataContainer):
+        """Add entity_id and BIRD codes to selected categories.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+
+        Returns:
+            bool: True for success or False otherwise
+
+        For example, update/add identifiers:
+
+            loop_
+            _pdbx_entity_nonpoly.entity_id
+            _pdbx_entity_nonpoly.name
+            _pdbx_entity_nonpoly.comp_id
+
+        """
+        try:
+            eD = {}
+            if dataContainer.exists("pdbx_entity_nonpoly"):
+                npObj = dataContainer.getObj("pdbx_entity_nonpoly")
+                for ii in range(npObj.getRowCount()):
+                    entityId = npObj.getValue("entity_id", ii)
+                    compId = npObj.getValue("comp_id", ii)
+                    eD[entityId] = compId
+
+            if dataContainer.exists("rcsb_entity_container_identifiers"):
+                pObj = dataContainer.getObj("rcsb_entity_container_identifiers")
+                if not pObj.hasAttribute("nonpolymer_comp_id"):
+                    pObj.appendAttribute("nonpolymer_comp_id")
+                for ii in range(pObj.getRowCount()):
+                    entityId = pObj.getValue("entity_id", ii)
+                    compId = eD[entityId] if entityId in eD else "?"
+                    pObj.setValue(compId, "nonpolymer_comp_id", ii)
+            #
+            return True
+        except Exception as e:
+            logger.exception("%s  failing with %s", dataContainer.getName(), str(e))
+        return False
+
+    def __addBirdEntityIds(self, dataContainer):
+        """Add entity_id and BIRD codes to selected categories.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+
+        Returns:
+            bool: True for success or False otherwise
+
+        For example, update/add identifiers:
+
+            loop_
+            _pdbx_molecule.instance_id
+            _pdbx_molecule.prd_id
+            _pdbx_molecule.asym_id
+
+            loop_
+            _pdbx_entity_nonpoly.entity_id
+            _pdbx_entity_nonpoly.name
+            _pdbx_entity_nonpoly.comp_id
+
+        with:
+
+        _pdbx_molecule.rcsb_entity_id
+        _pdbx_molecule.rcsb_comp_id
+
+        _pdbx_entity_nonpoly.rcsb_prd_id
+        _entity_poly.rcsb_prd_id
+
+        _rcsb_entity_containter_identifiers.prd_id
+
+        """
+        catName = "pdbx_molecule"
+        atName = "rcsb_entity_id"
+        try:
+
+            logger.debug("Starting catName %s atName %s", catName, atName)
+            if catName != "pdbx_molecule" and "atName" != "rcsb_entity_id":
+                return False
+            #
+            if not (dataContainer.exists(catName) and dataContainer.exists("struct_asym")):
+                return False
+            #
+            cObj = dataContainer.getObj(catName)
+            if not cObj.hasAttribute(atName):
+                cObj.appendAttribute(atName)
+            #
+            if not cObj.hasAttribute("rcsb_comp_id"):
+                cObj.appendAttribute("rcsb_comp_id")
+            #
+            aD = {}
+            aObj = dataContainer.getObj("struct_asym")
+            for ii in range(aObj.getRowCount()):
+                entityId = aObj.getValue("entity_id", ii)
+                asymId = aObj.getValue("id", ii)
+                aD[asymId] = entityId
+            #
+            eD = {}
+            if dataContainer.exists("pdbx_entity_nonpoly"):
+                npObj = dataContainer.getObj("pdbx_entity_nonpoly")
+                for ii in range(npObj.getRowCount()):
+                    entityId = npObj.getValue("entity_id", ii)
+                    compId = npObj.getValue("comp_id", ii)
+                    eD[entityId] = compId
+            #
+            #
+            prdD = {}
+            for ii in range(cObj.getRowCount()):
+                asymId = cObj.getValue("asym_id", ii)
+                prdId = cObj.getValue("prd_id", ii)
+                if asymId in aD:
+                    entityId = aD[asymId]
+                    prdD[entityId] = prdId
+                    cObj.setValue(entityId, atName, ii)
+                    compId = eD[entityId] if entityId in eD else "."
+                    cObj.setValue(compId, "rcsb_comp_id", ii)
+                else:
+                    logger.error("%s missing entityId for asymId %s", dataContainer.getName(), asymId)
+            #
+            if prdD and dataContainer.exists("pdbx_entity_nonpoly"):
+                npObj = dataContainer.getObj("pdbx_entity_nonpoly")
+                if not npObj.hasAttribute("rcsb_prd_id"):
+                    npObj.appendAttribute("rcsb_prd_id")
+                for ii in range(npObj.getRowCount()):
+                    entityId = npObj.getValue("entity_id", ii)
+                    prdId = prdD[entityId] if entityId in prdD else "."
+                    npObj.setValue(prdId, "rcsb_prd_id", ii)
+            #
+            if prdD and dataContainer.exists("entity_poly"):
+                pObj = dataContainer.getObj("entity_poly")
+                if not pObj.hasAttribute("rcsb_prd_id"):
+                    pObj.appendAttribute("rcsb_prd_id")
+                for ii in range(pObj.getRowCount()):
+                    entityId = pObj.getValue("entity_id", ii)
+                    prdId = prdD[entityId] if entityId in prdD else "."
+                    pObj.setValue(prdId, "rcsb_prd_id", ii)
+            #
+            #
+            if prdD and dataContainer.exists("rcsb_entity_container_identifiers"):
+                pObj = dataContainer.getObj("rcsb_entity_container_identifiers")
+                if not pObj.hasAttribute("prd_id"):
+                    pObj.appendAttribute("prd_id")
+                if not pObj.hasAttribute("nonpolymer_comp_id"):
+                    pObj.appendAttribute("nonpolymer_comp_id")
+                for ii in range(pObj.getRowCount()):
+                    entityId = pObj.getValue("entity_id", ii)
+                    prdId = prdD[entityId] if entityId in prdD else "?"
+                    pObj.setValue(prdId, "prd_id", ii)
+                    compId = eD[entityId] if entityId in eD else "?"
+                    pObj.setValue(compId, "nonpolymer_comp_id", ii)
+
+            #
+            #
+            return True
+        except Exception as e:
+            logger.exception("%s %s %s failing with %s", dataContainer.getName(), catName, atName, str(e))
+        return False
+
+    def addStructRefSeqEntityIds(self, dataContainer, catName, **kwargs):
+        """Add entity ids in categories struct_ref_seq and struct_ref_seq_dir instances.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+            atName (str): Attribute name
+
+        Returns:
+            bool: True for success or False otherwise
+
+        """
+        try:
+            logger.debug("Starting with %r %r %r", dataContainer.getName(), catName, kwargs)
+            if catName != "struct_ref_seq":
+                return False
+            #
+            if not (dataContainer.exists(catName) and dataContainer.exists("struct_ref")):
+                return False
+            #
+            atName = "rcsb_entity_id"
+            srsObj = dataContainer.getObj(catName)
+            if not srsObj.hasAttribute(atName):
+                # srsObj.appendAttribute(atName)
+                srsObj.appendAttributeExtendRows(atName, defaultValue="?")
+            #
+            srObj = dataContainer.getObj("struct_ref")
+            #
+            srsdObj = None
+            if dataContainer.exists("struct_ref_seq_dif"):
+                srsdObj = dataContainer.getObj("struct_ref_seq_dif")
+                if not srsdObj.hasAttribute(atName):
+                    # srsdObj.appendAttribute(atName)
+                    srsdObj.appendAttributeExtendRows(atName, defaultValue="?")
+
+            for ii in range(srObj.getRowCount()):
+                entityId = srObj.getValue("entity_id", ii)
+                refId = srObj.getValue("id", ii)
+                #
+                # Get indices for the target refId.
+                iRowL = srsObj.selectIndices(refId, "ref_id")
+                for iRow in iRowL:
+                    srsObj.setValue(entityId, "rcsb_entity_id", iRow)
+                    alignId = srsObj.getValue("align_id", iRow)
+                    #
+                    if srsdObj:
+                        jRowL = srsdObj.selectIndices(alignId, "align_id")
+                        for jRow in jRowL:
+                            srsdObj.setValue(entityId, "rcsb_entity_id", jRow)
+
+            return True
+        except Exception as e:
+            logger.exception("%s %s failing with %s", dataContainer.getName(), catName, str(e))
+        return False
+
+    def buildEntityPolyInfo(self, dataContainer, catName, **kwargs):
+        """Build category rcsb_entity_poly_info and supplement category entity_poly.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+
+        Returns:
+            bool: True for success or False otherwise
+
+        For example, :
+            loop_
+            _rcsb_entity_poly_info.ordinal_id
+            _rcsb_entity_poly_info.entry_id
+            _rcsb_entity_poly_info.entity_id
+            _rcsb_entity_poly_info.comp_id
+            _rcsb_entity_poly_info.is_modified
+            _rcsb_entity_poly_info.is_heterogeneous
+            _rcsb_entity_poly_info.entity_sequence_length
+            _rcsb_entity_poly_info.chem_comp_count
+
+            1 1ABC 1 1 MSE Y N 100 1
+            2 1ABC 1 2 TRP N N 100 4
+            # ... abbreviated ...
+
+        """
+        logger.debug("Starting with %r %r %r", dataContainer.getName(), catName, kwargs)
+        try:
+            # Exit if source categories are missing
+            if not (dataContainer.exists("entity_poly") and dataContainer.exists("entry")):
+                return False
+            #
+            # Create the new target category
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            cObj = dataContainer.getObj(catName)
+            #
+            cN = "rcsb_entity_monomer_container_identifiers"
+            if not dataContainer.exists(cN):
+                dataContainer.append(DataCategory(cN, attributeNameList=self.__dApi.getAttributeNameList(cN)))
+            idObj = dataContainer.getObj(cN)
+
+            #
+            epObj = dataContainer.getObj("entity_poly")
+            for atName in [
+                "rcsb_mutation_count",
+                "rcsb_artifact_monomer_count",
+                "rcsb_conflict_count",
+                "rcsb_insertion_count",
+                "rcsb_deletion_count",
+                "rcsb_sample_sequence_length",
+                "rcsb_non_std_monomer_count",
+                "rcsb_non_std_monomers",
+            ]:
+                if not epObj.hasAttribute(atName):
+                    epObj.appendAttribute(atName)
+
+            #
+            eObj = dataContainer.getObj("entry")
+            entryId = eObj.getValue("id", 0)
+            # ------- --------- ------- --------- ------- --------- ------- --------- ------- ---------
+            seqDifD = self.__commonU.getEntitySequenceFeatureCounts(dataContainer)
+            eD = self.__commonU.getPolymerEntityMonomerCounts(dataContainer)
+            elD = self.__commonU.getPolymerEntityLengthsEnumerated(dataContainer)
+            modMonD = self.__commonU.getPolymerEntityModifiedMonomers(dataContainer)
+            #
+            monDict3 = self.__commonU.monDict3
+            ii = 0
+            for entityId, cD in eD.items():
+                for compId, chemCompCount in cD.items():
+                    modFlag = "N" if compId in monDict3 else "Y"
+                    cObj.setValue(ii + 1, "ordinal_id", ii)
+                    cObj.setValue(entryId, "entry_id", ii)
+                    cObj.setValue(entityId, "entity_id", ii)
+                    cObj.setValue(compId, "comp_id", ii)
+                    cObj.setValue(chemCompCount, "chem_comp_count", ii)
+                    cObj.setValue(round(float(chemCompCount) / float(elD[entityId]), 5), "chem_comp_polymer_fraction", ii)
+                    cObj.setValue(modFlag, "is_modified", ii)
+                    #
+                    idObj.setValue(ii + 1, "ordinal_id", ii)
+                    idObj.setValue(entryId, "entry_id", ii)
+                    idObj.setValue(entityId, "entity_id", ii)
+                    idObj.setValue(compId, "comp_id", ii)
+                    ii += 1
+            #
+            for ii in range(epObj.getRowCount()):
+                entityId = epObj.getValue("entity_id", ii)
+                mutations = seqDifD[entityId]["mutation"] if entityId in seqDifD else 0
+                conflicts = seqDifD[entityId]["conflict"] if entityId in seqDifD else 0
+                insertions = seqDifD[entityId]["insertion"] if entityId in seqDifD else 0
+                deletions = seqDifD[entityId]["deletion"] if entityId in seqDifD else 0
+                artifacts = seqDifD[entityId]["artifact"] if entityId in seqDifD else 0
+                seqLen = elD[entityId] if entityId in elD else None
+                epObj.setValue(mutations, "rcsb_mutation_count", ii)
+                epObj.setValue(artifacts, "rcsb_artifact_monomer_count", ii)
+                epObj.setValue(conflicts, "rcsb_conflict_count", ii)
+                epObj.setValue(insertions, "rcsb_insertion_count", ii)
+                epObj.setValue(deletions, "rcsb_deletion_count", ii)
+                if seqLen is not None:
+                    epObj.setValue(seqLen, "rcsb_sample_sequence_length", ii)
+                #
+                numMod = len(modMonD[entityId])
+                uModL = ",".join(modMonD[entityId]) if numMod else "?"
+                epObj.setValue(numMod, "rcsb_non_std_monomer_count", ii)
+                epObj.setValue(uModL, "rcsb_non_std_monomers", ii)
+
+            return True
+        except Exception as e:
+            logger.exception("%s %s failing with %s", dataContainer.getName(), catName, str(e))
+        return False
+
+    def addBranchedEntityComponentCounts(self, dataContainer, catName, atName, **kwargs):
+        """Add total number branched components in the branched entity.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): target category name
+            atName (str): target attribute name
+
+        Returns:
+            bool: True for success or False otherwise
+        """
+        try:
+            logger.debug("Starting with %r %r %r %r", dataContainer.getName(), catName, atName, kwargs)
+            if not (dataContainer.exists("pdbx_entity_branch") and dataContainer.exists("pdbx_entity_branch_list")):
+                return False
+            #
+            ebObj = dataContainer.getObj("pdbx_entity_branch")
+            eblObj = dataContainer.getObj("pdbx_entity_branch_list")
+            #
+            if not ebObj.hasAttribute(atName):
+                ebObj.appendAttribute(atName)
+
+            for ii in range(ebObj.getRowCount()):
+                entityId = ebObj.getValue("entity_id", ii)
+                tL = eblObj.selectValuesWhere("entity_id", entityId, "entity_id")
+                ebObj.setValue(len(tL), atName, ii)
+
+            return True
+        except Exception as e:
+            logger.exception("For %s %s failing with %s", catName, atName, str(e))
+        return False
+
+    def addEntityMisc(self, dataContainer, catName, atName, **kwargs):
+        """Add consolidated enzyme classification macromolecule names to the entity category.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+
+        Returns:
+            bool: True for success or False otherwise
+
+        For instance, add:
+
+        _entity.rcsb_macromolecular_names_combined  <<< Dictionary target
+
+        _entity.rcsb_ec_lineage_name
+        _entity.rcsb_ec_lineage_id
+        _entity.rcsb_ec_lineage_depth
+
+        """
+        try:
+            if not (dataContainer.exists("entry") and dataContainer.exists("entity")):
+                return False
+            #
+            if catName == "entity" and atName in ["rcsb_ec_lineage_name", "rcsb_ec_lineage_id", "rcsb_ec_lineage_depth"]:
+                return True
+            #
+            eObj = dataContainer.getObj("entity")
+            atList = [
+                "rcsb_ec_lineage_depth",
+                "rcsb_ec_lineage_id",
+                "rcsb_ec_lineage_name",
+                "rcsb_macromolecular_names_combined_name",
+                "rcsb_macromolecular_names_combined_provenance_source",
+                "rcsb_macromolecular_names_combined_provenance_code",
+                "rcsb_enzyme_class_combined_ec",
+                "rcsb_enzyme_class_combined_provenance_source",
+                "rcsb_enzyme_class_combined_depth",
+            ]
+            for at in atList:
+                if not eObj.hasAttribute(at):
+                    eObj.appendAttribute(at)
+
+            hasEc = eObj.hasAttribute("pdbx_ec")
+            #
+            rP = kwargs.get("resourceProvider")
+            ecU = None
+            if hasEc:
+                ecU = rP.getResource("EnzymeProvider instance") if rP else None
+            #
+            ncObj = None
+            if dataContainer.exists("entity_name_com"):
+                ncObj = dataContainer.getObj("entity_name_com")
+            # get an BIRD assigned names -
+            birdFeatureD = self.__getBirdFeatures(dataContainer)
+            birdNameD = {}
+            for (entityId, _, _, filteredFeature), fName in birdFeatureD.items():
+                if filteredFeature == "BIRD_MOLECULAR_NAME" and fName:
+                    birdNameD.setdefault(entityId, []).append(fName)
+
+            for ii in range(eObj.getRowCount()):
+                entityId = eObj.getValue("id", ii)
+                entityType = eObj.getValue("type", ii)
+                #
+                eObj.setValue("?", "rcsb_ec_lineage_depth", ii)
+                eObj.setValue("?", "rcsb_ec_lineage_id", ii)
+                eObj.setValue("?", "rcsb_ec_lineage_name", ii)
+                eObj.setValue("?", "rcsb_macromolecular_names_combined_name", ii)
+                eObj.setValue("?", "rcsb_macromolecular_names_combined_provenance_source", ii)
+                eObj.setValue("?", "rcsb_macromolecular_names_combined_provenance_code", ii)
+                eObj.setValue("?", "rcsb_enzyme_class_combined_ec", ii)
+                eObj.setValue("?", "rcsb_enzyme_class_combined_provenance_source", ii)
+                eObj.setValue("?", "rcsb_enzyme_class_combined_depth", ii)
+                #
+                if entityType not in ["polymer", "branched"]:
+                    continue
+                #
+                # --------------------------------------------------------------------------
+                #  PDB assigned names
+                nameL = []
+                sourceL = []
+                provCodeL = []
+                nmL = str(eObj.getValue("pdbx_description", ii)).split(",")
+                nmL = self.__cleanupCsv(nmL)
+                nmL = [tV.strip() for tV in nmL if len(tV) > 3]
+                nmLookUpD = {}
+                for nm in nmL:
+                    if nm.upper() in nmLookUpD:
+                        continue
+                    nmLookUpD[nm.upper()] = True
+                    nameL.append(nm)
+                    sourceL.append("PDB Preferred Name")
+                    provCodeL.append("ECO:0000304")
+                #
+                # PDB common names/synonyms
+                logger.debug("%s ii %d nmL %r", dataContainer.getName(), ii, nmL)
+                #
+                if ncObj:
+                    ncL = []
+                    tL = ncObj.selectValuesWhere("name", entityId, "entity_id")
+                    logger.debug("%s ii %d tL %r", dataContainer.getName(), ii, tL)
+                    for tV in tL:
+                        tff = tV.split(",")
+                        ncL.extend(tff)
+                    ncL = self.__cleanupCsv(ncL)
+                    ncL = [tV.strip() for tV in ncL if len(tV) > 3]
+                    for nc in ncL:
+                        if nc.upper() in nmLookUpD:
+                            continue
+                        nmLookUpD[nc.upper()] = True
+                        nameL.append(nc)
+                        sourceL.append("PDB Synonym")
+                        provCodeL.append("ECO:0000303")
+                    logger.debug("%s ii %d ncL %r", dataContainer.getName(), ii, ncL)
+                #
+                if entityId in birdNameD:
+                    for nm in birdNameD[entityId]:
+                        if nm.upper() in nmLookUpD:
+                            continue
+                        nmLookUpD[nm.upper()] = True
+                        nameL.append(nm)
+                        sourceL.append("PDB BIRD Name")
+                        provCodeL.append("ECO:0000303")
+                #
+                if nameL:
+                    eObj.setValue(";".join(nameL), "rcsb_macromolecular_names_combined_name", ii)
+                    eObj.setValue(";".join(sourceL), "rcsb_macromolecular_names_combined_provenance_source", ii)
+                    eObj.setValue(";".join(provCodeL), "rcsb_macromolecular_names_combined_provenance_code", ii)
+
+                # --------------------------------------------------------------------------
+                linL = []
+                ecIdUpdL = []
+                ecDepthUpdL = []
+                ecV = eObj.getValueOrDefault("pdbx_ec", ii, defaultValue=None)
+                if ecV:
+                    ecIdL = ecV.split(",") if ecV else []
+                    if ecIdL:
+                        ecIdL = list(OrderedDict.fromkeys(ecIdL))
+                        for tId in ecIdL:
+                            ecId = ecU.normalize(tId)
+                            if not ecU.exists(ecId):
+                                continue
+                            # tL = ecU.getLineage(ecId) if ecId and len(ecId) > 7 else None
+                            tL = ecU.getLineage(ecId)
+                            if tL:
+                                linL.extend(tL)
+                                ecIdUpdL.append(ecId)
+                                ecDepthUpdL.append(str(ecId.count(".") + 1))
+
+                    if linL:
+                        eObj.setValue(";".join([str(tup[0]) for tup in linL]), "rcsb_ec_lineage_depth", ii)
+                        eObj.setValue(";".join([str(tup[1]) for tup in linL]), "rcsb_ec_lineage_id", ii)
+                        eObj.setValue(";".join([tup[2] for tup in linL]), "rcsb_ec_lineage_name", ii)
+                    if ecIdUpdL:
+                        eObj.setValue(",".join(ecIdUpdL), "pdbx_ec", ii)
+                        eObj.setValue(";".join(ecIdUpdL), "rcsb_enzyme_class_combined_ec", ii)
+                        eObj.setValue(";".join(ecDepthUpdL), "rcsb_enzyme_class_combined_depth", ii)
+                        eObj.setValue(";".join(["PDB Primary Data" for _ in ecIdUpdL]), "rcsb_enzyme_class_combined_provenance_source", ii)
+                    else:
+                        eObj.setValue("?", "pdbx_ec", ii)
+                        eObj.setValue("?", "rcsb_enzyme_class_combined_ec", ii)
+                        eObj.setValue("?", "rcsb_enzyme_class_combined_provenance_source", ii)
+                        eObj.setValue("?", "rcsb_enzyme_class_combined_depth", ii)
+                        if ecIdL:
+                            logger.debug("%s obsolete or undefined EC class detected %r", dataContainer.getName(), ecV)
+            return True
+        except Exception as e:
+            logger.exception("For %s %s failing with %s", catName, atName, str(e))
+        return False
+
+    def __cleanupCsv(self, tL):
+        """Ad hoc cleanup function for comma separated lists with embedded punctuation"""
+        rL = []
+        try:
+            key_paths = functools.cmp_to_key(cmpElements)
+            groups = [",".join(grp) for key, grp in itertools.groupby(tL, key_paths)]
+            rL = list(OrderedDict.fromkeys(groups))
+        except Exception:
+            pass
+        return rL
+
+    def __filterCaseDuplicates(self, inpSL):
+        oL = []
+        try:
+            lookUpD = {}
+            for inpS in inpSL:
+                if inpS.upper() in lookUpD:
+                    continue
+                lookUpD[inpS.upper()] = True
+                oL.append(inpS)
+        except Exception:
+            return inpSL
+
+        return oL
+
+    def __getAttribList(self, sObj, atTupL):
+        atL = []
+        atSL = []
+        if sObj:
+            for (atS, at) in atTupL:
+                if sObj.hasAttribute(atS):
+                    atL.append(at)
+                    atSL.append(atS)
+        return atSL, atL
+
+    def __normalizeCsvToList(self, entryId, colL, separator=","):
+        """Normalize a row containing some character delimited fields.
+
+        Expand list of uneven lists into unifornm list of lists.
+        Only two list lengths are logically supported: 1 and second
+        maximum length.
+
+        returns: list of expanded rows or the original input.
+
+        """
+        tcL = []
+        countL = []
+        for col in colL:
+            cL = [t.strip() for t in col.split(separator)]
+            tcL.append(cL)
+            countL.append(len(cL))
+        #
+        tL = list(OrderedDict.fromkeys(countL))
+        if len(tL) == 1 and tL[0] == 1:
+            return [colL]
+        #
+        orig = False
+        # Report pathological cases ...
+        if orig:
+            if (len(tL) > 2) or (tL[0] != 1 and len(tL) == 2):
+                logger.error("%s integrated source data inconsistent %r colL", entryId, colL)
+                return [colL]
+        #
+        # Expand the columns with uniform length
+        #
+
+        if orig:
+            icL = []
+            maxL = tL[1]
+            for tc in tcL:
+                if len(tc) == 1:
+                    tc = tc * maxL
+                icL.append(tc)
+        else:
+            icL = []
+            maxL = tL[1]
+            for tc in tcL:
+                if len(tc) == 1:
+                    tc = tc * maxL
+                if len(tc) < maxL:
+                    for _ in range(maxL - len(tc)):
+                        tc.append("?")
+                icL.append(tc[:maxL])
+
+        #
+        logger.debug("%s icL %r", entryId, icL)
+        # Convert back to a row list
+        #
+        iRow = 0
+        rL = []
+        for iRow in range(maxL):
+            row = []
+            for ic in icL:
+                row.append(ic[iRow])
+            rL.append(row)
+
+        return rL
+
+    def __stripWhiteSpace(self, val):
+        """Remove all white space from the input value."""
+        if val is None:
+            return val
+        return self.__wsPattern.sub("", val)
+
+    #
+    def __getTargetComponentFeatures(self, dataContainer):
+        """Get targeted components-
+
+        Args:
+            dataContainer ([type]): [description]
+
+        Returns:
+            dict: {(entityId, compId, "SUBJECT_OF_INVESTIGATION")} = True
+
+        """
+        tcD = {}
+        try:
+            if not dataContainer.exists("pdbx_entity_nonpoly"):
+                return tcD
+            ccTargets = self.__commonU.getTargetComponents(dataContainer)
+            if dataContainer.exists("pdbx_entity_nonpoly"):
+                npObj = dataContainer.getObj("pdbx_entity_nonpoly")
+                for ii in range(npObj.getRowCount()):
+                    entityId = npObj.getValue("entity_id", ii)
+                    compId = npObj.getValue("comp_id", ii)
+                    if compId in ccTargets:
+                        tcD[(entityId, compId, "SUBJECT_OF_INVESTIGATION")] = True
+        except Exception as e:
+            logger.exception("Failing for %s with %s", dataContainer.getName(), str(e))
+        return tcD
+
+    #
+    def __getBirdFeatures(self, dataContainer):
+        """Get type and class Bird annotations -
+
+        Args:
+            dataContainer ([type]): [description]
+
+        Returns:
+            dict: {(entityId, compId, "BIRD MOLECULE CLASS|BIRD MOLECULE CLASS")} = <class name or type name>
+
+
+        Example:
+            _pdbx_molecule_features.prd_id    PRD_002214
+            _pdbx_molecule_features.name      'N-[(5-METHYLISOXAZOL-3-Y ...'
+            _pdbx_molecule_features.type      Peptide-like
+            _pdbx_molecule_features.class     Inhibitor
+            _pdbx_molecule_features.details   ?
+            #
+            _pdbx_molecule.instance_id      1
+            _pdbx_molecule.prd_id           PRD_002214
+            _pdbx_molecule.asym_id          B
+            _pdbx_molecule.rcsb_entity_id   2
+            _pdbx_molecule.rcsb_comp_id     .
+            #
+
+        """
+        bD = {}
+        if not (dataContainer.exists("pdbx_molecule_features") and dataContainer.exists("pdbx_molecule")):
+            return bD
+
+        try:
+            asymIdEntityIdD = self.__commonU.getInstanceEntityMap(dataContainer)
+            eD = {}
+            if dataContainer.exists("pdbx_entity_nonpoly"):
+                npObj = dataContainer.getObj("pdbx_entity_nonpoly")
+                for ii in range(npObj.getRowCount()):
+                    entityId = npObj.getValue("entity_id", ii)
+                    compId = npObj.getValue("comp_id", ii)
+                    eD[entityId] = compId
+
+            pfObj = dataContainer.getObj("pdbx_molecule_features")
+            pfD = {}
+            for ii in range(pfObj.getRowCount()):
+                prdId = pfObj.getValue("prd_id", ii)
+                prdType = pfObj.getValueOrDefault("class", ii, defaultValue=None)
+                prdClass = pfObj.getValueOrDefault("type", ii, defaultValue=None)
+                prdName = pfObj.getValueOrDefault("name", ii, defaultValue=None)
+                pfD[prdId] = (prdType, prdClass, prdName)
+
+            pObj = dataContainer.getObj("pdbx_molecule")
+            bD = {}
+            for ii in range(pObj.getRowCount()):
+                asymId = pObj.getValue("asym_id", ii)
+                prdId = pObj.getValue("prd_id", ii)
+                entityId = asymIdEntityIdD[asymId]
+                compId = eD[entityId] if entityId in eD else None
+                if pfD[prdId][0]:
+                    bD[(entityId, compId, prdId, "BIRD_MOLECULE_TYPE")] = pfD[prdId][0]
+                if pfD[prdId][1]:
+                    bD[(entityId, compId, prdId, "BIRD_MOLECULE_CLASS")] = pfD[prdId][1]
+                if pfD[prdId][2]:
+                    bD[(entityId, compId, prdId, "BIRD_MOLECULE_NAME")] = pfD[prdId][2]
+
+        except Exception as e:
+            logger.exception("Failing for %s with %s", dataContainer.getName(), str(e))
+        return bD
+
+    def __getEntityFeatureTypes(self, eType):
+        eTupL = []
+        if eType == "polymer":
+            eTupL = self.__dApi.getEnumListWithDetail("rcsb_polymer_entity_feature_summary", "type")
+        elif eType == "non-polymer":
+            eTupL = self.__dApi.getEnumListWithDetail("rcsb_nonpolymer_entity_feature_summary", "type")
+        elif eType == "branched":
+            eTupL = self.__dApi.getEnumListWithDetail("rcsb_branched_entity_feature_summary", "type")
+        #
+        fTypeL = sorted([tup[0] for tup in eTupL])
+        return fTypeL
+
+    def buildEntityFeatureSummary(self, dataContainer, catName, **kwargs):
+        """Build category rcsb_entity_feature_summary (UPDATED)
+
+        Example:
+
+            loop_
+            _rcsb_entity_feature_summary.ordinal
+            _rcsb_entity_feature_summary.entry_id
+            _rcsb_entity_feature_summary.entity_id
+            _rcsb_entity_feature_summary.type
+            _rcsb_entity_feature_summary.count
+            _rcsb_entity_feature_summary.coverage
+            # ...
+        """
+        logger.debug("Starting with %r %r %r", dataContainer.getName(), catName, kwargs)
+        try:
+            if catName != "rcsb_entity_feature_summary":
+                return False
+            if not dataContainer.exists("rcsb_entity_feature") and not dataContainer.exists("entry"):
+                return False
+
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            #
+            eObj = dataContainer.getObj("entry")
+            entryId = eObj.getValue("id", 0)
+            #
+            sObj = dataContainer.getObj(catName)
+            fObj = dataContainer.getObj("rcsb_entity_feature")
+            #
+            entityPolymerLengthD = self.__commonU.getPolymerEntityLengthsEnumerated(dataContainer)
+            eTypeD = self.__commonU.getEntityTypes(dataContainer)
+
+            fCountD = OrderedDict()
+            fMonomerCountD = OrderedDict()
+            for ii in range(fObj.getRowCount()):
+                entityId = fObj.getValue("entity_id", ii)
+                #
+                fType = fObj.getValue("type", ii)
+                fId = fObj.getValue("feature_id", ii)
+                fCountD.setdefault(entityId, {}).setdefault(fType, set()).add(fId)
+
+                #
+                tbegS = fObj.getValueOrDefault("feature_positions_beg_seq_id", ii, defaultValue=None)
+                tendS = fObj.getValueOrDefault("feature_positions_end_seq_id", ii, defaultValue=None)
+                if fObj.hasAttribute("feature_positions_beg_seq_id") and tbegS is not None and fObj.hasAttribute("feature_positions_end_seq_id") and tendS is not None:
+                    begSeqIdL = str(fObj.getValue("feature_positions_beg_seq_id", ii)).split(";")
+                    endSeqIdL = str(fObj.getValue("feature_positions_end_seq_id", ii)).split(";")
+                    monCount = 0
+                    for begSeqId, endSeqId in zip(begSeqIdL, endSeqIdL):
+                        monCount += abs(int(endSeqId) - int(begSeqId) + 1)
+                    fMonomerCountD.setdefault(entityId, {}).setdefault(fType, []).append(monCount)
+                elif fObj.hasAttribute("feature_positions_beg_seq_id") and tbegS:
+                    seqIdL = str(fObj.getValue("feature_positions_beg_seq_id", ii)).split(";")
+                    fMonomerCountD.setdefault(entityId, {}).setdefault(fType, []).append(len(seqIdL))
+            #
+            ii = 0
+            for entityId, eType in eTypeD.items():
+                fTypes = self.__getEntityFeatureTypes(eType)
+                for fType in fTypes:
+                    sObj.setValue(ii + 1, "ordinal", ii)
+                    sObj.setValue(entryId, "entry_id", ii)
+                    sObj.setValue(entityId, "entity_id", ii)
+                    sObj.setValue(fType, "type", ii)
+
+                    minL = maxL = None
+                    fracC = 0.0
+                    fCount = 0
+                    if entityId in fCountD and fType in fCountD[entityId]:
+                        fCount = len(fCountD[entityId][fType])
+
+                    if entityId in fMonomerCountD and fType in fMonomerCountD[entityId] and entityId in entityPolymerLengthD:
+                        fracC = float(sum(fMonomerCountD[entityId][fType])) / float(entityPolymerLengthD[entityId])
+                    #
+                    if fType in ["artifact"] and entityId in fMonomerCountD and fType in fMonomerCountD[entityId]:
+                        minL = min(fMonomerCountD[entityId][fType])
+                        maxL = max(fMonomerCountD[entityId][fType])
+
+                    sObj.setValue(round(fracC, 5), "coverage", ii)
+                    sObj.setValue(fCount, "count", ii)
+                    if minL is not None:
+                        sObj.setValue(minL, "minimum_length", ii)
+                        sObj.setValue(maxL, "maximum_length", ii)
+                    #
+                    ii += 1
+        except Exception as e:
+            logger.exception("Failing with %s", str(e))
+        return True
+
+    def buildEntityFeatures(self, dataContainer, catName, **kwargs):
+        """Build category rcsb_entity_feature ...
+
+        Example:
+            loop_
+            _rcsb_entity_feature.ordinal
+            _rcsb_entity_feature.entry_id
+            _rcsb_entity_feature.entity_id
+            _rcsb_entity_feature.feature_id
+            _rcsb_entity_feature.type
+            _rcsb_entity_feature.name
+            _rcsb_entity_feature.description
+            _rcsb_entity_feature.reference_scheme
+            _rcsb_entity_feature.provenance_source
+            _rcsb_entity_feature.assignment_version
+            _rcsb_entity_feature.feature_positions_beg_seq_id
+            _rcsb_entity_feature.feature_positions_end_seq_id
+            _rcsb_entity_feature.feature_positions_value
+
+        """
+        logger.debug("Starting with %r %r %r", dataContainer.getName(), catName, kwargs)
+        try:
+            if catName != "rcsb_entity_feature":
+                return False
+            # Exit if source categories are missing
+            if not dataContainer.exists("entry"):
+                return False
+            #
+            # Create the new target category
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            cObj = dataContainer.getObj(catName)
+            #
+            # rP = kwargs.get("resourceProvider")
+
+            eObj = dataContainer.getObj("entry")
+            entryId = eObj.getValue("id", 0)
+            #
+            # ---------------
+            ii = cObj.getRowCount()
+            jj = 1
+            #
+            targetFeatureD = self.__getTargetComponentFeatures(dataContainer)
+            for (entityId, compId, filteredFeature) in targetFeatureD:
+                cObj.setValue(ii + 1, "ordinal", ii)
+                cObj.setValue(entryId, "entry_id", ii)
+                cObj.setValue(entityId, "entity_id", ii)
+                cObj.setValue(compId, "comp_id", ii)
+                cObj.setValue(filteredFeature, "type", ii)
+                cObj.setValue("entity_feature_%d" % jj, "feature_id", ii)
+                details = "Ligand targeted in this investigation"
+                cObj.setValue(details, "description", ii)
+                cObj.setValue(compId, "name", ii)
+                cObj.setValue("PDB", "provenance_source", ii)
+                cObj.setValue("V1.0", "assignment_version", ii)
+                #
+                jj += 1
+                ii += 1
+            #
+            # BIRD type and class
+            skipBird = True
+            if not skipBird:
+                birdFeatureD = self.__getBirdFeatures(dataContainer)
+                for (entityId, compId, prdId, filteredFeature), fName in birdFeatureD.items():
+                    cObj.setValue(ii + 1, "ordinal", ii)
+                    cObj.setValue(entryId, "entry_id", ii)
+                    cObj.setValue(entityId, "entity_id", ii)
+                    cObj.setValue(compId, "comp_id", ii)
+                    cObj.setValue(filteredFeature, "type", ii)
+                    cObj.setValue("entity_feature_%d" % jj, "feature_id", ii)
+                    if compId:
+                        details = "Non-polymer BIRD %s chemical component %s" % (prdId, compId)
+                    else:
+                        details = "Polymer BIRD %s entity %s" % (prdId, entityId)
+                    cObj.setValue(details, "description", ii)
+                    #
+                    cObj.setValue(fName, "name", ii)
+                    cObj.setValue("PDB", "provenance_source", ii)
+                    cObj.setValue("V1.0", "assignment_version", ii)
+                    #
+                    jj += 1
+                    ii += 1
+            #
+            # Monomer modifications
+            jj = 1
+            modMonomerFeatures = self.__commonU.getPolymerModifiedMonomerFeatures(dataContainer)
+            for (entityId, seqId, compId, filteredFeature) in modMonomerFeatures:
+                parentCompId = self.__ccP.getParentComponent(compId)
+
+                cObj.setValue(ii + 1, "ordinal", ii)
+                cObj.setValue(entryId, "entry_id", ii)
+                cObj.setValue(entityId, "entity_id", ii)
+                cObj.setValue(filteredFeature, "type", ii)
+                cObj.setValue("monomer_feature_%d" % jj, "feature_id", ii)
+                if parentCompId:
+                    details = "Parent monomer %s" % parentCompId
+                    cObj.setValue(details, "name", ii)
+                #
+                cObj.setValue(compId, "feature_positions_beg_comp_id", ii)
+                cObj.setValue(seqId, "feature_positions_beg_seq_id", ii)
+                #
+                cObj.setValue("PDB entity", "reference_scheme", ii)
+                cObj.setValue("PDB", "provenance_source", ii)
+                cObj.setValue("V1.0", "assignment_version", ii)
+                #
+                jj += 1
+                ii += 1
+            #
+            #
+            seqMonomerFeatures = self.__commonU.getEntitySequenceMonomerFeatures(dataContainer)
+            for (entityId, seqId, compId, filteredFeature), sDetails in seqMonomerFeatures.items():
+                if filteredFeature not in ["mutation"]:
+                    continue
+                cObj.setValue(ii + 1, "ordinal", ii)
+                cObj.setValue(entryId, "entry_id", ii)
+                cObj.setValue(entityId, "entity_id", ii)
+                cObj.setValue(filteredFeature, "type", ii)
+                cObj.setValue("monomer_feature_%d" % jj, "feature_id", ii)
+                details = ",".join(list(sDetails))
+                cObj.setValue(details, "name", ii)
+                #
+                cObj.setValue(compId, "feature_positions_beg_comp_id", ii)
+                cObj.setValue(seqId, "feature_positions_beg_seq_id", ii)
+                #
+                cObj.setValue("PDB entity", "reference_scheme", ii)
+                cObj.setValue("PDB", "provenance_source", ii)
+                cObj.setValue("V1.0", "assignment_version", ii)
+                #
+                jj += 1
+                ii += 1
+            #
+            jj = 1
+            seqRangeFeatures = self.__commonU.getEntitySequenceRangeFeatures(dataContainer)
+            for (entityId, begSeqId, endSeqId, filteredFeature), sDetails in seqRangeFeatures.items():
+                if filteredFeature not in ["artifact"]:
+                    continue
+                cObj.setValue(ii + 1, "ordinal", ii)
+                cObj.setValue(entryId, "entry_id", ii)
+                cObj.setValue(entityId, "entity_id", ii)
+                cObj.setValue(filteredFeature, "type", ii)
+                cObj.setValue("range_feature_%d" % jj, "feature_id", ii)
+                details = ",".join(list(sDetails))
+                cObj.setValue(details, "name", ii)
+                #
+                cObj.setValue(begSeqId, "feature_positions_beg_seq_id", ii)
+                cObj.setValue(endSeqId, "feature_positions_end_seq_id", ii)
+                #
+                cObj.setValue("PDB entity", "reference_scheme", ii)
+                cObj.setValue("PDB", "provenance_source", ii)
+                cObj.setValue("V1.0", "assignment_version", ii)
+                #
+                jj += 1
+                ii += 1
+            return True
+        except Exception as e:
+            logger.exception("%s %s failing with %s", dataContainer.getName(), catName, str(e))
+        return False
+
+    def addTypedEntityCategories(self, dataContainer, blockName, **kwargs):
+        """Slice common entity categories into type specific entity categories.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            blockName (str): Block name
+
+        Returns:
+            bool: True for success or False otherwise
+
+        """
+        logger.debug("Starting with %r %r %r", dataContainer.getName(), blockName, kwargs)
+        try:
+            if not (dataContainer.exists("entry") and dataContainer.exists("entity")):
+                return False
+            if dataContainer.exists("rcsb_polymer_entity") or dataContainer.exists("rcsb_nonpolymer_entity") or dataContainer.exists("rcsb_branched_entity"):
+                return True
+            # -----
+            categoryMapD = {
+                "polymer": [
+                    ("entity", "rcsb_polymer_entity", "id"),
+                    ("entity_keywords", "rcsb_polymer_entity_keywords", "entity_id"),
+                    ("entity_name_com", "rcsb_polymer_entity_name_com", "entity_id"),
+                    ("entity_name_sys", "rcsb_polymer_entity_name_sys", "entity_id"),
+                    ("rcsb_entity_container_identifiers", "rcsb_polymer_entity_container_identifiers", "entity_id"),
+                    ("rcsb_entity_instance_container_identifiers", "rcsb_polymer_entity_instance_container_identifiers", "entity_id"),
+                ],
+                "non-polymer": [
+                    ("entity", "rcsb_nonpolymer_entity", "id"),
+                    ("entity_keywords", "rcsb_nonpolymer_entity_keywords", "entity_id"),
+                    ("entity_name_com", "rcsb_nonpolymer_entity_name_com", "entity_id"),
+                    ("entity_name_sys", "rcsb_nonpolymer_entity_name_sys", "entity_id"),
+                    ("rcsb_entity_container_identifiers", "rcsb_nonpolymer_entity_container_identifiers", "entity_id"),
+                    ("rcsb_entity_instance_container_identifiers", "rcsb_nonpolymer_entity_instance_container_identifiers", "entity_id"),
+                ],
+                "branched": [
+                    ("entity", "rcsb_branched_entity", "id"),
+                    ("entity_keywords", "rcsb_branched_entity_keywords", "entity_id"),
+                    ("entity_name_com", "rcsb_branched_entity_name_com", "entity_id"),
+                    ("entity_name_sys", "rcsb_branched_entity_name_sys", "entity_id"),
+                    ("rcsb_entity_container_identifiers", "rcsb_branched_entity_container_identifiers", "entity_id"),
+                    ("rcsb_entity_instance_container_identifiers", "rcsb_branched_entity_instance_container_identifiers", "entity_id"),
+                ],
+            }
+            ok = self.__sliceCategoriesByEntityType(dataContainer, categoryMapD)
+            return ok
+        except Exception as e:
+            logger.exception("%s for %s failing with %s", dataContainer.getName(), blockName, str(e))
+        return False
+
+    def addTypedEntityFeatureCategories(self, dataContainer, blockName, **kwargs):
+        """Slice common entity categories into type specific entity categories.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            blockName (str): Block name
+
+        Returns:
+            bool: True for success or False otherwise
+
+        """
+        logger.debug("Starting with %r %r %r", dataContainer.getName(), blockName, kwargs)
+        try:
+            if not (dataContainer.exists("entry") and dataContainer.exists("entity")):
+                return False
+            if (
+                dataContainer.exists("rcsb_polymer_entity_feature")
+                or dataContainer.exists("rcsb_nonpolymer_entity_feature")
+                or dataContainer.exists("rcsb_branched_entity_feature")
+            ):
+                return True
+            # -----
+            categoryMapD = {
+                "polymer": [
+                    ("rcsb_entity_feature", "rcsb_polymer_entity_feature", "entity_id"),
+                    ("rcsb_entity_feature_summary", "rcsb_polymer_entity_feature_summary", "entity_id"),
+                    ("rcsb_entity_instance_feature", "rcsb_polymer_instance_feature", "entity_id"),
+                    ("rcsb_entity_instance_feature_summary", "rcsb_polymer_instance_feature_summary", "entity_id"),
+                    ("rcsb_entity_instance_validation_feature", "rcsb_polymer_instance_feature", "entity_id"),
+                    ("rcsb_entity_instance_validation_feature_summary", "rcsb_polymer_instance_feature_summary", "entity_id"),
+                    ("rcsb_struct_conn", "rcsb_polymer_struct_conn", "entity_id"),
+                    ("rcsb_entity_annotation", "rcsb_polymer_entity_annotation", "entity_id"),
+                    ("rcsb_entity_instance_annotation", "rcsb_polymer_instance_annotation", "entity_id"),
+                ],
+                "non-polymer": [
+                    ("rcsb_entity_feature", "rcsb_nonpolymer_entity_feature", "entity_id"),
+                    ("rcsb_entity_feature_summary", "rcsb_nonpolymer_entity_feature_summary", "entity_id"),
+                    ("rcsb_entity_instance_feature", "rcsb_nonpolymer_instance_feature", "entity_id"),
+                    ("rcsb_entity_instance_feature_summary", "rcsb_nonpolymer_instance_feature_summary", "entity_id"),
+                    ("rcsb_entity_instance_validation_feature", "rcsb_nonpolymer_instance_feature", "entity_id"),
+                    ("rcsb_entity_instance_validation_feature_summary", "rcsb_nonpolymer_instance_feature_summary", "entity_id"),
+                    ("rcsb_struct_conn", "rcsb_nonpolymer_struct_conn", "entity_id"),
+                    ("rcsb_entity_annotation", "rcsb_nonpolymer_entity_annotation", "entity_id"),
+                    ("rcsb_entity_instance_annotation", "rcsb_nonpolymer_instance_annotation", "entity_id"),
+                ],
+                "branched": [
+                    ("rcsb_entity_feature", "rcsb_branched_entity_feature", "entity_id"),
+                    ("rcsb_entity_feature_summary", "rcsb_branched_entity_feature_summary", "entity_id"),
+                    ("rcsb_entity_instance_feature", "rcsb_branched_instance_feature", "entity_id"),
+                    ("rcsb_entity_instance_feature_summary", "rcsb_branched_instance_feature_summary", "entity_id"),
+                    ("rcsb_entity_instance_validation_feature", "rcsb_branched_instance_feature", "entity_id"),
+                    ("rcsb_entity_instance_validation_feature_summary", "rcsb_branched_instance_feature_summary", "entity_id"),
+                    ("rcsb_struct_conn", "rcsb_branched_struct_conn", "entity_id"),
+                    ("rcsb_entity_annotation", "rcsb_branched_entity_annotation", "entity_id"),
+                    ("rcsb_entity_instance_annotation", "rcsb_branched_instance_annotation", "entity_id"),
+                ],
+            }
+            ok = self.__sliceCategoriesByEntityType(dataContainer, categoryMapD)
+            return ok
+        except Exception as e:
+            logger.exception("%s for %s failing with %s", dataContainer.getName(), blockName, str(e))
+        return False
+
+    def __sliceCategoriesByEntityType(self, dataContainer, categoryMapD):
+        """Slice common entity categories into type specific entity categories.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            categoryMapD  (dict): {<entity_type>: [{<source category>, <destination cateogory>, <source entity key>}, ... ], ... }
+
+        Returns:
+            bool: True for success or False otherwise
+
+        """
+        logger.debug("Starting with %r", dataContainer.getName())
+        try:
+            if not (dataContainer.exists("entry") and dataContainer.exists("entity")):
+                return False
+            eObj = dataContainer.getObj("entity")
+            eCount = eObj.getRowCount()
+            eTypeD = {eObj.getValue("id", ii): eObj.getValue("type", ii) for ii in range(eCount) if eObj.getValue("type", ii)}
+            eTypes = list(set(eTypeD.values()))
+            logger.debug("%s entity types %r map %r", dataContainer.getName(), eTypes, eTypeD)
+            for eType, catTupL in categoryMapD.items():
+                if eType in eTypes:
+                    # create new categories as needed
+                    for srcCatN, dstCatN, entityIdKey in catTupL:
+                        if dataContainer.exists(srcCatN):
+                            if not dataContainer.exists(dstCatN):
+                                dataContainer.append(DataCategory(dstCatN, attributeNameList=self.__dApi.getAttributeNameList(dstCatN)))
+                            srcObj = dataContainer.getObj(srcCatN)
+                            dstObj = dataContainer.getObj(dstCatN)
+                            jj = dstObj.getRowCount()
+                            for ii in range(srcObj.getRowCount()):
+                                entityId = srcObj.getValue(entityIdKey, ii)
+                                logger.debug("%s srcCatN %s row %d key %r entityId %r", dataContainer.getName(), srcCatN, ii, entityIdKey, entityId)
+                                if eTypeD[entityId] != eType:
+                                    continue
+                                for dstAtName in dstObj.getAttributeList():
+                                    srcAtName = entityIdKey if dstAtName == "entity_id" else dstAtName
+                                    logger.debug(
+                                        "%s entityId %r srcCatN %r srcAtName %s dstCatN %s dstAtName %s", dataContainer.getName(), entityId, srcCatN, srcAtName, dstCatN, dstAtName
+                                    )
+                                    if srcObj.hasAttribute(srcAtName):
+                                        tS = srcObj.getValue(srcAtName, ii)
+                                        logger.debug("%s entityId %r srcCatN %r srcAtName %s value %s", dataContainer.getName(), entityId, srcCatN, srcAtName, tS)
+                                        if srcAtName in ["formula_weight"]:
+                                            # dalton to kiloDalton
+                                            try:
+                                                tV = float(tS) / 1000.0
+                                                tS = "%.3f" % tV
+                                            except Exception:
+                                                tS = "?"
+                                        if dstAtName in ["ordinal"]:
+                                            tS = jj + 1
+                                        _ = dstObj.setValue(tS, dstAtName, jj)
+                                    else:
+                                        logger.debug("Missing srcCatN %s srcAtName %s", srcCatN, srcAtName)
+                                        _ = dstObj.setValue("?", dstAtName, jj)
+                                jj += 1
+            return True
+        except Exception as e:
+            logger.exception("%s failing with %s", dataContainer.getName(), str(e))
+        return False
+
+    #
+    def buildEntityAnnotations(self, dataContainer, catName, **kwargs):
+        """Build category rcsb_entity_annotation ...
+
+        Example:
+            loop_
+            _rcsb_entity_annotation.ordinal
+            _rcsb_entity_annotation.entry_id
+            _rcsb_entity_annotation.entity_id
+            _rcsb_entity_annotation.annotation_id
+            _rcsb_entity_annotation.type
+            _rcsb_entity_annotation.name
+            _rcsb_entity_annotation.description
+            _rcsb_entity_annotation.annotation_lineage_id
+            _rcsb_entity_annotation.annotation_lineage_name
+            _rcsb_entity_annotation.annotation_lineage_depth
+            _rcsb_entity_annotation.provenance_source
+            _rcsb_entity_annotation.assignment_version
+
+        """
+        logger.debug("Starting with %r %r %r", dataContainer.getName(), catName, kwargs)
+        try:
+            if catName != "rcsb_entity_annotation":
+                return False
+            # Exit if source categories are missing
+            if not dataContainer.exists("entry"):
+                return False
+            #
+            # Create the new target category
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            cObj = dataContainer.getObj(catName)
+            #
+            eObj = dataContainer.getObj("entry")
+            entryId = eObj.getValue("id", 0)
+            #
+            # ---------------
+            ii = cObj.getRowCount()
+            jj = 1
+            #
+            targetFeatureD = self.__getTargetComponentFeatures(dataContainer)
+            #
+            for (entityId, compId, filteredFeature) in targetFeatureD:
+                cObj.setValue(ii + 1, "ordinal", ii)
+                cObj.setValue(entryId, "entry_id", ii)
+                cObj.setValue(entityId, "entity_id", ii)
+                cObj.setValue(compId, "comp_id", ii)
+                cObj.setValue(filteredFeature, "type", ii)
+                cObj.setValue("entity_annotation_%d" % jj, "annotation_id", ii)
+                details = "Ligand targeted in this investigation"
+                cObj.setValue(details, "description", ii)
+                cObj.setValue(compId, "name", ii)
+                cObj.setValue("PDB", "provenance_source", ii)
+                cObj.setValue("V1.0", "assignment_version", ii)
+                #
+                jj += 1
+                ii += 1
+            #
+            skipBird = True
+            if not skipBird:
+                # BIRD type and class
+                birdFeatureD = self.__getBirdFeatures(dataContainer)
+                for (entityId, compId, prdId, filteredFeature), fName in birdFeatureD.items():
+                    cObj.setValue(ii + 1, "ordinal", ii)
+                    cObj.setValue(entryId, "entry_id", ii)
+                    cObj.setValue(entityId, "entity_id", ii)
+                    cObj.setValue(compId, "comp_id", ii)
+                    cObj.setValue(filteredFeature, "type", ii)
+                    cObj.setValue("entity_annotation_%d" % jj, "annotation_id", ii)
+                    if compId:
+                        details = "Non-polymer BIRD %s chemical component %s" % (prdId, compId)
+                    else:
+                        details = "Polymer BIRD %s entity %s" % (prdId, entityId)
+                    cObj.setValue(details, "description", ii)
+                    #
+                    cObj.setValue(fName, "name", ii)
+                    cObj.setValue("PDB", "provenance_source", ii)
+                    cObj.setValue("V1.0", "assignment_version", ii)
+                    #
+                    jj += 1
+                    ii += 1
+            return True
+        except Exception as e:
+            logger.exception("%s %s failing with %s", dataContainer.getName(), catName, str(e))
+        return False
diff --git a/rcsb/utils/dictionary/DictMethodEntityInstanceHelper.py b/rcsb/utils/dictionary/DictMethodEntityInstanceHelper.py
new file mode 100644
index 0000000..6b67f3c
--- /dev/null
+++ b/rcsb/utils/dictionary/DictMethodEntityInstanceHelper.py
@@ -0,0 +1,1776 @@
+##
+# File:    DictMethodEntityInstanceHelper.py
+# Author:  J. Westbrook
+# Date:    16-Jul-2019
+# Version: 0.001 Initial version
+#
+##
+"""
+This helper class implements methods supporting entity-instance-level functions in the RCSB dictionary extension.
+
+"""
+__docformat__ = "restructuredtext en"
+__author__ = "John Westbrook"
+__email__ = "jwest@rcsb.rutgers.edu"
+__license__ = "Apache 2.0"
+
+# pylint: disable=too-many-lines
+
+import logging
+import re
+import time
+from collections import OrderedDict
+
+from mmcif.api.DataCategory import DataCategory
+
+logger = logging.getLogger(__name__)
+
+
+class DictMethodEntityInstanceHelper(object):
+    """This helper class implements methods supporting entity-instance-level functions in the RCSB dictionary extension."""
+
+    def __init__(self, **kwargs):
+        """
+        Args:
+            resourceProvider: (obj) instance of DictMethodResourceProvider()
+            raiseExceptions: (bool, optional) flag to raise rather than handle exceptions
+
+        """
+        #
+        self._raiseExceptions = kwargs.get("raiseExceptions", False)
+        self.__wsPattern = re.compile(r"\s+", flags=re.UNICODE | re.MULTILINE)
+        self.__reNonDigit = re.compile(r"[^\d]+")
+        #
+        rP = kwargs.get("resourceProvider")
+        self.__commonU = rP.getResource("DictMethodCommonUtils instance") if rP else None
+        self.__dApi = rP.getResource("Dictionary API instance (pdbx_core)") if rP else None
+        self.__ccP = rP.getResource("ChemCompProvider instance") if rP else None
+        self.__rlsP = rP.getResource("RcsbLigandScoreProvider instance") if rP else None
+        #
+        logger.debug("Dictionary entity-instance level method helper init")
+
+    def buildContainerEntityInstanceIds(self, dataContainer, catName, **kwargs):
+        """
+        Build:
+
+        loop_
+        _rcsb_entity_instance_container_identifiers.entry_id
+        _rcsb_entity_instance_container_identifiers.entity_id
+        _rcsb_entity_instance_container_identifiers.entity_type
+        _rcsb_entity_instance_container_identifiers.asym_id
+        _rcsb_entity_instance_container_identifiers.auth_asym_id
+        _rcsb_entity_instance_container_identifiers.comp_id
+        _rcsb_entity_instance_container_identifiers.auth_seq_id
+        ...
+
+        """
+        logger.debug("Starting catName %s kwargs %r", catName, kwargs)
+        try:
+            if not (dataContainer.exists("entry") and dataContainer.exists("entity")):
+                return False
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            #
+            cObj = dataContainer.getObj(catName)
+            asymD = self.__commonU.getInstanceIdMap(dataContainer)
+            npAuthAsymD = self.__commonU.getNonPolymerIdMap(dataContainer)
+            brAuthAsymD = self.__commonU.getBranchedIdMap(dataContainer)
+            seqIdMapAsymD = self.__commonU.getAuthToSeqIdMap(dataContainer)
+            #
+            for ii, asymId in enumerate(sorted(asymD)):
+                for k, v in asymD[asymId].items():
+                    cObj.setValue(v, k, ii)
+                v = ",".join(seqIdMapAsymD[asymId]) if asymId in seqIdMapAsymD else "?"
+                cObj.setValue(v, "auth_to_entity_poly_seq_mapping", ii)
+
+            ok = self.__addPdbxValidateAsymIds(dataContainer, asymD, npAuthAsymD, brAuthAsymD)
+            return ok
+        except Exception as e:
+            logger.exception("For %s failing with %s", catName, str(e))
+        return False
+
+    def __addPdbxValidateAsymIds(self, dataContainer, asymMapD, npAuthAsymMapD, brAuthAsymMapD):
+        """Internal method to insert Asym_id's into the following categories:
+
+        _pdbx_validate_close_contact.rcsb_label_asym_id_1
+        _pdbx_validate_close_contact.rcsb_label_asym_id_2
+        _pdbx_validate_symm_contact.rcsb_label_asym_id_1
+        _pdbx_validate_symm_contact.rcsb_label_asym_id_2
+        _pdbx_validate_rmsd_bond.rcsb_label_asym_id_1
+        _pdbx_validate_rmsd_bond.rcsb_label_asym_id_2
+        _pdbx_validate_rmsd_angle.rcsb_label_asym_id_1
+        _pdbx_validate_rmsd_angle.rcsb_label_asym_id_2
+        _pdbx_validate_rmsd_angle.rcsb_label_asym_id_3
+        _pdbx_validate_torsion.rcsb_label_asym_id
+        _pdbx_validate_peptide_omega.rcsb_label_asym_id_1
+        _pdbx_validate_peptide_omega.rcsb_label_asym_id_2
+        _pdbx_validate_chiral.rcsb_label_asym_id
+        _pdbx_validate_planes.rcsb_label_asym_id
+        _pdbx_validate_planes_atom.rcsb_label_asym_id
+        _pdbx_validate_main_chain_plane.rcsb_label_asym_id
+        _pdbx_validate_polymer_linkage.rcsb_label_asym_id_1
+        _pdbx_validate_polymer_linkage.rcsb_label_asym_id_2
+        """
+        #
+        mD = {
+            "pdbx_validate_close_contact": [("auth_asym_id_1", "auth_seq_id_1", "rcsb_label_asym_id_1"), ("auth_asym_id_2", "auth_seq_id_2", "rcsb_label_asym_id_2")],
+            "pdbx_validate_symm_contact": [("auth_asym_id_1", "auth_seq_id_1", "rcsb_label_asym_id_1"), ("auth_asym_id_2", "auth_seq_id_2", "rcsb_label_asym_id_2")],
+            "pdbx_validate_rmsd_bond": [("auth_asym_id_1", "auth_seq_id_1", "rcsb_label_asym_id_1"), ("auth_asym_id_2", "auth_seq_id_2", "rcsb_label_asym_id_2")],
+            "pdbx_validate_rmsd_angle": [
+                ("auth_asym_id_1", "auth_seq_id_1", "rcsb_label_asym_id_1"),
+                ("auth_asym_id_2", "auth_seq_id_2", "rcsb_label_asym_id_2"),
+                ("auth_asym_id_3", "auth_seq_id_3", "rcsb_label_asym_id_3"),
+            ],
+            "pdbx_validate_torsion": [("auth_asym_id", "auth_seq_id", "rcsb_label_asym_id")],
+            "pdbx_validate_peptide_omega": [("auth_asym_id_1", "auth_seq_id_1", "rcsb_label_asym_id_1"), ("auth_asym_id_2", "auth_seq_id_2", "rcsb_label_asym_id_2")],
+            "pdbx_validate_chiral": [("auth_asym_id", "auth_seq_id", "rcsb_label_asym_id")],
+            "pdbx_validate_planes": [("auth_asym_id", "auth_seq_id", "rcsb_label_asym_id")],
+            "pdbx_validate_planes_atom": [("auth_asym_id", "auth_seq_id", "rcsb_label_asym_id")],
+            "pdbx_validate_main_chain_plane": [("auth_asym_id", "auth_seq_id", "rcsb_label_asym_id")],
+            "pdbx_validate_polymer_linkage": [("auth_asym_id_1", "auth_seq_id_1", "rcsb_label_asym_id_1"), ("auth_asym_id_2", "auth_seq_id_2", "rcsb_label_asym_id_2")],
+            "pdbx_distant_solvent_atoms": [("auth_asym_id", "auth_seq_id", "rcsb_label_asym_id")],
+        }
+        # -- JDW
+        # polymer lookup
+        authAsymD = {}
+        for asymId, dD in asymMapD.items():
+            if dD["entity_type"].lower() in ["polymer", "branched"]:
+                authAsymD[(dD["auth_asym_id"], "?")] = asymId
+        #
+        # non-polymer lookup
+        #
+        logger.debug("%s authAsymD %r", dataContainer.getName(), authAsymD)
+        for (authAsymId, seqId), dD in npAuthAsymMapD.items():
+            if dD["entity_type"].lower() not in ["polymer", "branched"]:
+                authAsymD[(authAsymId, seqId)] = dD["asym_id"]
+        #
+        # branched lookup
+        logger.debug("%s authAsymD %r", dataContainer.getName(), authAsymD)
+        for (authAsymId, seqId), dD in brAuthAsymMapD.items():
+            if dD["entity_type"].lower() in ["branched"]:
+                authAsymD[(authAsymId, seqId)] = dD["asym_id"]
+        #
+        #
+        for catName, mTupL in mD.items():
+            if not dataContainer.exists(catName):
+                continue
+            cObj = dataContainer.getObj(catName)
+            for ii in range(cObj.getRowCount()):
+                for mTup in mTupL:
+                    try:
+                        authVal = cObj.getValue(mTup[0], ii)
+                    except Exception:
+                        authVal = "?"
+                    try:
+                        authSeqId = cObj.getValue(mTup[1], ii)
+                    except Exception:
+                        authSeqId = "?"
+
+                    # authVal = cObj.getValue(mTup[0], ii)
+                    # authSeqId = cObj.getValue(mTup[1], ii)
+                    #
+                    # logger.debug("%s %4d authAsymId %r authSeqId %r" % (catName, ii, authVal, authSeqId))
+                    #
+                    if (authVal, authSeqId) in authAsymD:
+                        if not cObj.hasAttribute(mTup[2]):
+                            cObj.appendAttribute(mTup[2])
+                        cObj.setValue(authAsymD[(authVal, authSeqId)], mTup[2], ii)
+                    elif (authVal, "?") in authAsymD:
+                        if not cObj.hasAttribute(mTup[2]):
+                            cObj.appendAttribute(mTup[2])
+                        cObj.setValue(authAsymD[(authVal, "?")], mTup[2], ii)
+                    else:
+                        if authVal not in ["."]:
+                            logger.warning("%s %s missing mapping auth asymId %s authSeqId %r", dataContainer.getName(), catName, authVal, authSeqId)
+                        if not cObj.hasAttribute(mTup[2]):
+                            cObj.appendAttribute(mTup[2])
+                        cObj.setValue("?", mTup[2], ii)
+
+        return True
+
+    def __initializeInstanceFeatureType(self, dataContainer, asymId, fCountD, countType="set"):
+        instTypeD = self.__commonU.getInstanceTypes(dataContainer)
+        eTupL = []
+        eType = instTypeD[asymId]
+        if eType == "polymer":
+            eTupL = self.__dApi.getEnumListWithDetail("rcsb_polymer_instance_feature_summary", "type")
+        elif eType in ["non-polymer", "water"]:
+            eTupL = self.__dApi.getEnumListWithDetail("rcsb_nonpolymer_instance_feature_summary", "type")
+        elif eType == "branched":
+            eTupL = self.__dApi.getEnumListWithDetail("rcsb_branched_instance_feature_summary", "type")
+        else:
+            logger.error("%r asymId %r eType %r", dataContainer.getName(), asymId, eType)
+        #
+        fTypeL = sorted([tup[0] for tup in eTupL])
+        #
+        for fType in fTypeL:
+            if countType == "set":
+                fCountD.setdefault(asymId, {}).setdefault(fType, set())
+            else:
+                fCountD.setdefault(asymId, {}).setdefault(fType, [])
+            #
+        return fCountD
+
+    # ---- JDW
+    def buildEntityInstanceFeatureSummaryPrev(self, dataContainer, catName, **kwargs):
+        """Build category rcsb_entity_instance_feature_summary (UPDATED)
+
+        Example:
+
+            loop_
+            _rcsb_entity_instance_feature_summary.ordinal
+            _rcsb_entity_instance_feature_summary.entry_id
+            _rcsb_entity_instance_feature_summary.entity_id
+            _rcsb_entity_instance_feature_summary.asym_id
+            _rcsb_entity_instance_feature_summary.auth_asym_id
+            #
+            _rcsb_entity_instance_feature_summary.type
+            _rcsb_entity_instance_feature_summary.count
+            _rcsb_entity_instance_feature_summary.coverage
+            # ...
+        """
+        logger.debug("Starting with %r %r %r", dataContainer.getName(), catName, kwargs)
+        try:
+            if catName != "rcsb_entity_instance_feature_summary":
+                return False
+            if not dataContainer.exists("rcsb_entity_instance_feature") and not dataContainer.exists("entry"):
+                return False
+
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            #
+            eObj = dataContainer.getObj("entry")
+            entryId = eObj.getValue("id", 0)
+            #
+            sObj = dataContainer.getObj(catName)
+            fObj = dataContainer.getObj("rcsb_entity_instance_feature")
+            #
+            instEntityD = self.__commonU.getInstanceEntityMap(dataContainer)
+            entityPolymerLengthD = self.__commonU.getPolymerEntityLengthsEnumerated(dataContainer)
+            # typeList = self.__dApi.getEnumList("rcsb_entity_instance_feature_summary", "type", sortFlag=True)
+            asymAuthD = self.__commonU.getAsymAuthIdMap(dataContainer)
+            instIdMapD = self.__commonU.getInstanceIdMap(dataContainer)
+
+            fCountD = OrderedDict()
+            fMonomerCountD = OrderedDict()
+            for ii in range(fObj.getRowCount()):
+                asymId = fObj.getValue("asym_id", ii)
+                # ---- initialize counts
+                # fCountD = self.__initializeInstanceFeatureType(dataContainer, asymId, fCountD, countType="set")
+                # fMonomerCountD = self.__initializeInstanceFeatureType(dataContainer, asymId, fMonomerCountD, countType="list")
+                # ----
+                fType = fObj.getValue("type", ii)
+                fId = fObj.getValue("feature_id", ii)
+                fCountD.setdefault(asymId, {}).setdefault(fType, set()).add(fId)
+                #
+                tbegS = fObj.getValueOrDefault("feature_positions_beg_seq_id", ii, defaultValue=None)
+                tendS = fObj.getValueOrDefault("feature_positions_end_seq_id", ii, defaultValue=None)
+                if fObj.hasAttribute("feature_positions_beg_seq_id") and tbegS is not None and fObj.hasAttribute("feature_positions_end_seq_id") and tendS is not None:
+                    begSeqIdL = str(fObj.getValue("feature_positions_beg_seq_id", ii)).split(";")
+                    endSeqIdL = str(fObj.getValue("feature_positions_end_seq_id", ii)).split(";")
+                    monCount = 0
+                    for begSeqId, endSeqId in zip(begSeqIdL, endSeqIdL):
+                        try:
+                            monCount += abs(int(endSeqId) - int(begSeqId) + 1)
+                        except Exception:
+                            logger.warning(
+                                "%s fType %r fId %r bad sequence begSeqIdL %r endSeqIdL %r tbegS %r tendS %r",
+                                dataContainer.getName(),
+                                fType,
+                                fId,
+                                begSeqIdL,
+                                endSeqIdL,
+                                tbegS,
+                                tendS,
+                            )
+
+                    fMonomerCountD.setdefault(asymId, {}).setdefault(fType, []).append(monCount)
+                elif fObj.hasAttribute("feature_positions_beg_seq_id") and tbegS:
+                    seqIdL = str(fObj.getValue("feature_positions_beg_seq_id", ii)).split(";")
+                    fMonomerCountD.setdefault(asymId, {}).setdefault(fType, []).append(len(seqIdL))
+            #
+            logger.debug("%s fCountD %r", entryId, fCountD)
+            #
+
+            ii = 0
+            for asymId, fTypeD in fCountD.items():
+                entityId = instEntityD[asymId]
+                authAsymId = asymAuthD[asymId]
+                for fType, fS in fTypeD.items():
+                    sObj.setValue(ii + 1, "ordinal", ii)
+                    sObj.setValue(entryId, "entry_id", ii)
+                    sObj.setValue(entityId, "entity_id", ii)
+                    sObj.setValue(asymId, "asym_id", ii)
+                    sObj.setValue(authAsymId, "auth_asym_id", ii)
+                    # add comp
+                    if asymId in instIdMapD and "comp_id" in instIdMapD[asymId] and instIdMapD[asymId]["comp_id"]:
+                        sObj.setValue(instIdMapD[asymId]["comp_id"], "comp_id", ii)
+                    sObj.setValue(fType, "type", ii)
+                    #
+                    if fType.startswith("UNOBSERVED") and asymId in fMonomerCountD and fType in fMonomerCountD[asymId]:
+                        fCount = sum(fMonomerCountD[asymId][fType])
+                    else:
+                        fCount = len(fS)
+                    sObj.setValue(fCount, "count", ii)
+                    fracC = 0.0
+                    if asymId in fMonomerCountD and fType in fMonomerCountD[asymId] and entityId in entityPolymerLengthD:
+                        fracC = float(sum(fMonomerCountD[asymId][fType])) / float(entityPolymerLengthD[entityId])
+                    sObj.setValue(round(fracC, 5), "coverage", ii)
+                    if (
+                        fType in ["CATH", "SCOP", "HELIX_P", "SHEET", "UNASSIGNED_SEC_STRUCT", "UNOBSERVED_RESIDUE_XYZ", "ZERO_OCCUPANCY_RESIDUE_XYZ"]
+                        and asymId in fMonomerCountD
+                        and fType in fMonomerCountD[asymId]
+                    ):
+                        minL = min(fMonomerCountD[asymId][fType]) if fMonomerCountD[asymId][fType] else 0
+                        maxL = max(fMonomerCountD[asymId][fType]) if fMonomerCountD[asymId][fType] else 0
+                        sObj.setValue(minL, "minimum_length", ii)
+                        sObj.setValue(maxL, "maximum_length", ii)
+                    ii += 1
+        except Exception as e:
+            logger.exception("Failing for %s with %s", dataContainer.getName(), str(e))
+        return True
+
+    # ---- JDW
+
+    def buildEntityInstanceFeatures(self, dataContainer, catName, **kwargs):
+        """Build category rcsb_entity_instance_feature ...
+
+        Example:
+            loop_
+            _rcsb_entity_instance_feature.ordinal
+            _rcsb_entity_instance_feature.entry_id
+            _rcsb_entity_instance_feature.entity_id
+            _rcsb_entity_instance_feature.asym_id
+            _rcsb_entity_instance_feature.auth_asym_id
+            _rcsb_entity_instance_feature.feature_id
+            _rcsb_entity_instance_feature.type
+            _rcsb_entity_instance_feature.name
+            _rcsb_entity_instance_feature.description
+            _rcsb_entity_instance_feature.reference_scheme
+            _rcsb_entity_instance_feature.provenance_source
+            _rcsb_entity_instance_feature.assignment_version
+            _rcsb_entity_instance_feature.feature_positions_beg_seq_id
+            _rcsb_entity_instance_feature.feature_positions_end_seq_id
+            _rcsb_entity_instance_feature.feature_positions_value
+
+        """
+        doLineage = False
+        logger.debug("Starting with %r %r %r", dataContainer.getName(), catName, kwargs)
+        try:
+            if catName != "rcsb_entity_instance_feature":
+                return False
+            # Exit if source categories are missing
+            if not dataContainer.exists("entry"):
+                return False
+            #
+            # Create the new target category
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            cObj = dataContainer.getObj(catName)
+            #
+            rP = kwargs.get("resourceProvider")
+
+            eObj = dataContainer.getObj("entry")
+            entryId = eObj.getValue("id", 0)
+            #
+            asymIdD = self.__commonU.getInstanceEntityMap(dataContainer)
+            asymAuthIdD = self.__commonU.getAsymAuthIdMap(dataContainer)
+            asymIdRangesD = self.__commonU.getInstancePolymerRanges(dataContainer)
+            pAuthAsymD = self.__commonU.getPolymerIdMap(dataContainer)
+            instTypeD = self.__commonU.getInstanceTypes(dataContainer)
+            # ---------------
+            # Add CATH assignments
+            cathU = rP.getResource("CathProvider instance") if rP else None
+            ii = cObj.getRowCount()
+            #
+            for asymId, authAsymId in asymAuthIdD.items():
+                if instTypeD[asymId] not in ["polymer", "branched"]:
+                    continue
+                entityId = asymIdD[asymId]
+                dL = cathU.getCathResidueRanges(entryId.lower(), authAsymId)
+                logger.debug("%s asymId %s authAsymId %s dL %r", entryId, asymId, authAsymId, dL)
+                vL = cathU.getCathVersions(entryId.lower(), authAsymId)
+                for (cathId, domId, tId, authSeqBeg, authSeqEnd) in dL:
+                    begSeqId = pAuthAsymD[(authAsymId, authSeqBeg, None)]["seq_id"] if (authAsymId, authSeqBeg, None) in pAuthAsymD else None
+                    endSeqId = pAuthAsymD[(authAsymId, authSeqEnd, None)]["seq_id"] if (authAsymId, authSeqEnd, None) in pAuthAsymD else None
+                    if not (begSeqId and endSeqId):
+                        # take the full chain
+                        begSeqId = asymIdRangesD[asymId]["begSeqId"] if asymId in asymIdRangesD else None
+                        endSeqId = asymIdRangesD[asymId]["endSeqId"] if asymId in asymIdRangesD else None
+                        if not (begSeqId and endSeqId):
+                            logger.info(
+                                "%s CATH cathId %r domId %r tId %r asymId %r authAsymId %r authSeqBeg %r authSeqEnd %r",
+                                entryId,
+                                cathId,
+                                domId,
+                                tId,
+                                asymId,
+                                authAsymId,
+                                authSeqBeg,
+                                authSeqEnd,
+                            )
+                            continue
+
+                    cObj.setValue(ii + 1, "ordinal", ii)
+                    cObj.setValue(entryId, "entry_id", ii)
+                    cObj.setValue(entityId, "entity_id", ii)
+                    cObj.setValue(asymId, "asym_id", ii)
+                    cObj.setValue(authAsymId, "auth_asym_id", ii)
+                    cObj.setValue("CATH", "type", ii)
+                    #
+                    cObj.setValue(str(cathId), "feature_id", ii)
+                    # cObj.setValue(str(domId), "feature_id", ii)
+                    # cObj.setValue(cathId, "name", ii)
+                    cObj.setValue(cathU.getCathName(cathId), "name", ii)
+                    #
+                    if doLineage:
+                        cObj.setValue(";".join(cathU.getNameLineage(cathId)), "annotation_lineage_name", ii)
+                        idLinL = cathU.getIdLineage(cathId)
+                        cObj.setValue(";".join(idLinL), "annotation_lineage_id", ii)
+                        cObj.setValue(";".join([str(jj) for jj in range(1, len(idLinL) + 1)]), "annotation_lineage_depth", ii)
+                    #
+                    #
+                    cObj.setValue(begSeqId, "feature_positions_beg_seq_id", ii)
+                    cObj.setValue(endSeqId, "feature_positions_end_seq_id", ii)
+                    #
+                    cObj.setValue("PDB entity", "reference_scheme", ii)
+                    cObj.setValue("CATH", "provenance_source", ii)
+                    cObj.setValue(vL[0], "assignment_version", ii)
+                    #
+                    ii += 1
+            # ------------
+            # Add SCOP assignments
+            oldCode = False
+            scopU = rP.getResource("ScopProvider instance") if rP else None
+            for asymId, authAsymId in asymAuthIdD.items():
+                if instTypeD[asymId] not in ["polymer", "branched"]:
+                    continue
+                entityId = asymIdD[asymId]
+                dL = scopU.getScopResidueRanges(entryId.lower(), authAsymId)
+                version = scopU.getScopVersion()
+                for (sunId, domId, sccs, tId, authSeqBeg, authSeqEnd) in dL:
+                    begSeqId = pAuthAsymD[(authAsymId, authSeqBeg, None)]["seq_id"] if (authAsymId, authSeqBeg, None) in pAuthAsymD else None
+                    endSeqId = pAuthAsymD[(authAsymId, authSeqEnd, None)]["seq_id"] if (authAsymId, authSeqEnd, None) in pAuthAsymD else None
+                    # logger.info("%s (first) begSeqId %r endSeqId %r", entryId, begSeqId, endSeqId)
+                    if not (begSeqId and endSeqId):
+                        # try another full range
+                        # begSeqId = asymIdRangesD[asymId]["begAuthSeqId"] if asymId in asymIdRangesD and "begAuthSeqId" in asymIdRangesD[asymId] else None
+                        # endSeqId = asymIdRangesD[asymId]["endAuthSeqId"] if asymId in asymIdRangesD and "endAuthSeqId" in asymIdRangesD[asymId] else None
+                        begSeqId = asymIdRangesD[asymId]["begSeqId"] if asymId in asymIdRangesD else None
+                        endSeqId = asymIdRangesD[asymId]["endSeqId"] if asymId in asymIdRangesD else None
+                        # logger.info("%s (altd) begSeqId %r endSeqId %r", entryId, begSeqId, endSeqId)
+                        if not (begSeqId and endSeqId):
+                            logger.debug(
+                                "%s unqalified SCOP sunId %r domId %r sccs %r asymId %r authAsymId %r authSeqBeg %r authSeqEnd %r",
+                                entryId,
+                                sunId,
+                                domId,
+                                sccs,
+                                asymId,
+                                authAsymId,
+                                authSeqBeg,
+                                authSeqEnd,
+                            )
+                            continue
+
+                    cObj.setValue(ii + 1, "ordinal", ii)
+                    cObj.setValue(entryId, "entry_id", ii)
+                    cObj.setValue(entityId, "entity_id", ii)
+                    cObj.setValue(asymId, "asym_id", ii)
+                    cObj.setValue(authAsymId, "auth_asym_id", ii)
+                    cObj.setValue("SCOP", "type", ii)
+                    #
+                    # cObj.setValue(str(sunId), "domain_id", ii)
+                    cObj.setValue(domId, "feature_id", ii)
+                    cObj.setValue(scopU.getScopName(sunId), "name", ii)
+                    #
+                    if doLineage:
+                        tL = [t if t is not None else "" for t in scopU.getNameLineage(sunId)]
+                        cObj.setValue(";".join(tL), "annotation_lineage_name", ii)
+                        idLinL = scopU.getIdLineage(sunId)
+                        cObj.setValue(";".join([str(t) for t in idLinL]), "annotation_lineage_id", ii)
+                        cObj.setValue(";".join([str(jj) for jj in range(1, len(idLinL) + 1)]), "annotation_lineage_depth", ii)
+                        #
+                    cObj.setValue(begSeqId, "feature_positions_beg_seq_id", ii)
+                    cObj.setValue(endSeqId, "feature_positions_end_seq_id", ii)
+                    if oldCode:
+                        if begSeqId is not None and endSeqId is not None:
+                            if begSeqId == 0:
+                                begSeqId += 1
+                                endSeqId += 1
+                            cObj.setValue(begSeqId, "feature_positions_beg_seq_id", ii)
+                            cObj.setValue(endSeqId, "feature_positions_end_seq_id", ii)
+                        else:
+                            tSeqBeg = asymIdRangesD[asymId]["begAuthSeqId"] if asymId in asymIdRangesD and "begAuthSeqId" in asymIdRangesD[asymId] else None
+                            cObj.setValue(tSeqBeg, "feature_positions_beg_seq_id", ii)
+                            tSeqEnd = asymIdRangesD[asymId]["endAuthSeqId"] if asymId in asymIdRangesD and "endAuthSeqId" in asymIdRangesD[asymId] else None
+                            cObj.setValue(tSeqEnd, "feature_positions_end_seq_id", ii)
+                        #
+                    cObj.setValue("PDB entity", "reference_scheme", ii)
+                    cObj.setValue("SCOPe", "provenance_source", ii)
+                    cObj.setValue(version, "assignment_version", ii)
+                    #
+                    ii += 1
+            # ------------
+            # Add sheet features
+            instSheetRangeD = self.__commonU.getProtSheetFeatures(dataContainer)
+            sheetSenseD = self.__commonU.getProtSheetSense(dataContainer)
+            for sId, sD in instSheetRangeD.items():
+                for asymId, rTupL in sD.items():
+                    entityId = asymIdD[asymId]
+                    authAsymId = asymAuthIdD[asymId]
+                    cObj.setValue(ii + 1, "ordinal", ii)
+                    cObj.setValue(entryId, "entry_id", ii)
+                    cObj.setValue(entityId, "entity_id", ii)
+                    cObj.setValue(asymId, "asym_id", ii)
+                    cObj.setValue(authAsymId, "auth_asym_id", ii)
+                    cObj.setValue("SHEET", "type", ii)
+                    #
+                    cObj.setValue(str(sId), "feature_id", ii)
+                    cObj.setValue("sheet", "name", ii)
+                    if sId in sheetSenseD:
+                        cObj.setValue(sheetSenseD[sId] + " sense sheet", "description", ii)
+                    #
+                    tSeqId = ";".join([str(rTup[0]) for rTup in rTupL])
+                    cObj.setValue(tSeqId, "feature_positions_beg_seq_id", ii)
+                    tSeqId = ";".join([str(rTup[1]) for rTup in rTupL])
+                    cObj.setValue(tSeqId, "feature_positions_end_seq_id", ii)
+                    #
+                    cObj.setValue("PDB entity", "reference_scheme", ii)
+                    cObj.setValue("PROMOTIF", "provenance_source", ii)
+                    cObj.setValue("V1.0", "assignment_version", ii)
+                    #
+                    ii += 1
+            # ------------------
+            # Helix features
+            helixRangeD = self.__commonU.getProtHelixFeatures(dataContainer)
+            for hId, hL in helixRangeD.items():
+                for (asymId, begSeqId, endSeqId) in hL:
+                    entityId = asymIdD[asymId]
+                    authAsymId = asymAuthIdD[asymId]
+                    cObj.setValue(ii + 1, "ordinal", ii)
+                    cObj.setValue(entryId, "entry_id", ii)
+                    cObj.setValue(entityId, "entity_id", ii)
+                    cObj.setValue(asymId, "asym_id", ii)
+                    cObj.setValue(authAsymId, "auth_asym_id", ii)
+                    cObj.setValue("HELIX_P", "type", ii)
+                    #
+                    cObj.setValue(str(hId), "feature_id", ii)
+                    cObj.setValue("helix", "name", ii)
+                    #
+                    cObj.setValue(begSeqId, "feature_positions_beg_seq_id", ii)
+                    cObj.setValue(endSeqId, "feature_positions_end_seq_id", ii)
+                    #
+                    cObj.setValue("PDB entity", "reference_scheme", ii)
+                    cObj.setValue("PROMOTIF", "provenance_source", ii)
+                    cObj.setValue("V1.0", "assignment_version", ii)
+                    #
+                    ii += 1
+            #
+            # ------------------
+            # Unassigned SS features
+            unassignedRangeD = self.__commonU.getProtUnassignedSecStructFeatures(dataContainer)
+            for asymId, rTupL in unassignedRangeD.items():
+                if not rTupL:
+                    continue
+                entityId = asymIdD[asymId]
+                authAsymId = asymAuthIdD[asymId]
+                cObj.setValue(ii + 1, "ordinal", ii)
+                cObj.setValue(entryId, "entry_id", ii)
+                cObj.setValue(entityId, "entity_id", ii)
+                cObj.setValue(asymId, "asym_id", ii)
+                cObj.setValue(authAsymId, "auth_asym_id", ii)
+                cObj.setValue("UNASSIGNED_SEC_STRUCT", "type", ii)
+                #
+                cObj.setValue(str(1), "feature_id", ii)
+                cObj.setValue("unassigned secondary structure", "name", ii)
+                #
+                cObj.setValue(";".join([str(rTup[0]) for rTup in rTupL]), "feature_positions_beg_seq_id", ii)
+                cObj.setValue(";".join([str(rTup[1]) for rTup in rTupL]), "feature_positions_end_seq_id", ii)
+                #
+                cObj.setValue("PDB entity", "reference_scheme", ii)
+                cObj.setValue("PROMOTIF", "provenance_source", ii)
+                cObj.setValue("V1.0", "assignment_version", ii)
+                #
+                ii += 1
+            #
+            cisPeptideD = self.__commonU.getCisPeptides(dataContainer)
+            for cId, cL in cisPeptideD.items():
+                for (asymId, begSeqId, endSeqId, modelId, omegaAngle) in cL:
+                    entityId = asymIdD[asymId]
+                    authAsymId = asymAuthIdD[asymId]
+                    cObj.setValue(ii + 1, "ordinal", ii)
+                    cObj.setValue(entryId, "entry_id", ii)
+                    cObj.setValue(entityId, "entity_id", ii)
+                    cObj.setValue(asymId, "asym_id", ii)
+                    cObj.setValue(authAsymId, "auth_asym_id", ii)
+                    cObj.setValue("CIS-PEPTIDE", "type", ii)
+                    cObj.setValue(str(cId), "feature_id", ii)
+                    cObj.setValue("cis-peptide", "name", ii)
+                    #
+                    cObj.setValue(begSeqId, "feature_positions_beg_seq_id", ii)
+                    cObj.setValue(endSeqId, "feature_positions_end_seq_id", ii)
+                    #
+                    cObj.setValue("PDB entity", "reference_scheme", ii)
+                    cObj.setValue("PDB", "provenance_source", ii)
+                    cObj.setValue("V1.0", "assignment_version", ii)
+                    tS = "cis-peptide bond in model %d with omega angle %.2f" % (modelId, omegaAngle)
+                    cObj.setValue(tS, "description", ii)
+                    #
+                    ii += 1
+            #
+            targetSiteD = self.__commonU.getTargetSiteInfo(dataContainer)
+            ligandSiteD = self.__commonU.getLigandSiteInfo(dataContainer)
+            for tId, tL in targetSiteD.items():
+                aD = OrderedDict()
+                for tD in tL:
+                    aD.setdefault(tD["asymId"], []).append((tD["compId"], tD["seqId"]))
+                for asymId, aL in aD.items():
+                    entityId = asymIdD[asymId]
+                    authAsymId = asymAuthIdD[asymId]
+                    cObj.setValue(ii + 1, "ordinal", ii)
+                    cObj.setValue(entryId, "entry_id", ii)
+                    cObj.setValue(entityId, "entity_id", ii)
+                    cObj.setValue(asymId, "asym_id", ii)
+                    cObj.setValue(authAsymId, "auth_asym_id", ii)
+                    cObj.setValue("BINDING_SITE", "type", ii)
+                    cObj.setValue(str(tId), "feature_id", ii)
+                    cObj.setValue("binding_site", "name", ii)
+                    #
+                    cObj.setValue(";".join([tup[0] for tup in aL]), "feature_positions_beg_comp_id", ii)
+                    cObj.setValue(";".join([tup[1] for tup in aL]), "feature_positions_beg_seq_id", ii)
+                    #
+                    cObj.setValue("PDB entity", "reference_scheme", ii)
+                    cObj.setValue("PDB", "provenance_source", ii)
+                    cObj.setValue("V1.0", "assignment_version", ii)
+                    if tId in ligandSiteD:
+                        cObj.setValue(ligandSiteD[tId]["description"], "description", ii)
+                        if ligandSiteD[tId]["siteLabel"]:
+                            cObj.setValue(ligandSiteD[tId]["siteLabel"], "name", ii)
+                    #
+                    ii += 1
+            #
+            unObsPolyResRngD = self.__commonU.getUnobservedPolymerResidueInfo(dataContainer)
+            for (modelId, asymId, zeroOccFlag), rTupL in unObsPolyResRngD.items():
+                entityId = asymIdD[asymId]
+                authAsymId = asymAuthIdD[asymId]
+                cObj.setValue(ii + 1, "ordinal", ii)
+                cObj.setValue(entryId, "entry_id", ii)
+                cObj.setValue(entityId, "entity_id", ii)
+                cObj.setValue(asymId, "asym_id", ii)
+                cObj.setValue(authAsymId, "auth_asym_id", ii)
+                #
+                if zeroOccFlag:
+                    cObj.setValue("ZERO_OCCUPANCY_RESIDUE_XYZ", "type", ii)
+                    tS = "All atom coordinates for this residue are reported with zero-occupancy in model %s" % modelId
+                    cObj.setValue(tS, "description", ii)
+                    cObj.setValue("residue coordinates with zero occupancy", "name", ii)
+                else:
+                    cObj.setValue("UNOBSERVED_RESIDUE_XYZ", "type", ii)
+                    tS = "No coordinates for this residue are reported in model %s" % modelId
+                    cObj.setValue(tS, "description", ii)
+                    cObj.setValue("unmodeled residue", "name", ii)
+                #
+                cObj.setValue(str(1), "feature_id", ii)
+                #
+                cObj.setValue(";".join([str(rTup[0]) for rTup in rTupL]), "feature_positions_beg_seq_id", ii)
+                cObj.setValue(";".join([str(rTup[1]) for rTup in rTupL]), "feature_positions_end_seq_id", ii)
+                #
+                cObj.setValue("PDB entity", "reference_scheme", ii)
+                cObj.setValue("PDB", "provenance_source", ii)
+                cObj.setValue("V1.0", "assignment_version", ii)
+                #
+                ii += 1
+
+            unObsPolyAtomRngD = self.__commonU.getUnobservedPolymerAtomInfo(dataContainer)
+            for (modelId, asymId, zeroOccFlag), rTupL in unObsPolyAtomRngD.items():
+                entityId = asymIdD[asymId]
+                authAsymId = asymAuthIdD[asymId]
+                cObj.setValue(ii + 1, "ordinal", ii)
+                cObj.setValue(entryId, "entry_id", ii)
+                cObj.setValue(entityId, "entity_id", ii)
+                cObj.setValue(asymId, "asym_id", ii)
+                cObj.setValue(authAsymId, "auth_asym_id", ii)
+                #
+                if zeroOccFlag:
+                    cObj.setValue("ZERO_OCCUPANCY_ATOM_XYZ", "type", ii)
+                    tS = "Some atom coordinates in this residue are reported with zero-occupancy in model %s" % modelId
+                    cObj.setValue(tS, "description", ii)
+                    cObj.setValue("atom coordinates with zero occupancy", "name", ii)
+                else:
+                    cObj.setValue("UNOBSERVED_ATOM_XYZ", "type", ii)
+                    tS = "Some atom coordinates in this residue are not reported in model %s" % modelId
+                    cObj.setValue(tS, "description", ii)
+                    cObj.setValue("partially modeled residue", "name", ii)
+                #
+                cObj.setValue(str(1), "feature_id", ii)
+                #
+                cObj.setValue(";".join([str(rTup[0]) for rTup in rTupL]), "feature_positions_beg_seq_id", ii)
+                cObj.setValue(";".join([str(rTup[1]) for rTup in rTupL]), "feature_positions_end_seq_id", ii)
+                #
+                cObj.setValue("PDB entity", "reference_scheme", ii)
+                cObj.setValue("PDB", "provenance_source", ii)
+                cObj.setValue("V1.0", "assignment_version", ii)
+                #
+                ii += 1
+
+            npbD = self.__commonU.getBoundNonpolymersByInstance(dataContainer)
+            jj = 1
+            for asymId, rTupL in npbD.items():
+                for rTup in rTupL:
+                    if rTup.connectType in ["covalent bond"]:
+                        fType = "HAS_COVALENT_LINKAGE"
+                        fId = "COVALENT_LINKAGE_%d" % jj
+
+                    elif rTup.connectType in ["metal coordination"]:
+                        fType = "HAS_METAL_COORDINATION_LINKAGE"
+                        fId = "METAL_COORDINATION_LINKAGE_%d" % jj
+                    else:
+                        continue
+
+                    entityId = asymIdD[asymId]
+                    authAsymId = asymAuthIdD[asymId]
+                    cObj.setValue(ii + 1, "ordinal", ii)
+                    cObj.setValue(entryId, "entry_id", ii)
+                    cObj.setValue(entityId, "entity_id", ii)
+                    cObj.setValue(asymId, "asym_id", ii)
+                    cObj.setValue(authAsymId, "auth_asym_id", ii)
+                    cObj.setValue(rTup.targetCompId, "comp_id", ii)
+                    cObj.setValue(fId, "feature_id", ii)
+                    cObj.setValue(fType, "type", ii)
+                    #
+                    # ("targetCompId", "connectType", "partnerCompId", "partnerAsymId", "partnerEntityType", "bondDistance", "bondOrder")
+                    cObj.setValue(
+                        ";".join(
+                            ["%s has %s with %s instance %s in model 1" % (rTup.targetCompId, rTup.connectType, rTup.partnerEntityType, rTup.partnerAsymId) for rTup in rTupL]
+                        ),
+                        "feature_value_details",
+                        ii,
+                    )
+                    cObj.setValue(";".join([rTup.partnerCompId if rTup.partnerCompId else "?" for rTup in rTupL]), "feature_value_comp_id", ii)
+                    cObj.setValue(";".join([rTup.bondDistance if rTup.bondDistance else "?" for rTup in rTupL]), "feature_value_reported", ii)
+                    cObj.setValue(";".join(["?" for rTup in rTupL]), "feature_value_reference", ii)
+                    cObj.setValue(";".join(["?" for rTup in rTupL]), "feature_value_uncertainty_estimate", ii)
+                    cObj.setValue(";".join(["?" for rTup in rTupL]), "feature_value_uncertainty_estimate_type", ii)
+
+                    cObj.setValue("PDB", "provenance_source", ii)
+                    cObj.setValue("V1.0", "assignment_version", ii)
+                    #
+                    ii += 1
+                    jj += 1
+
+            return True
+        except Exception as e:
+            logger.exception("%s %s failing with %s", dataContainer.getName(), catName, str(e))
+        return False
+
+    def addProtSecStructInfo(self, dataContainer, catName, **kwargs):
+        """
+        Add category rcsb_prot_sec_struct_info.
+
+        """
+        try:
+            logger.debug("Starting with %r %r %r", dataContainer.getName(), catName, kwargs)
+            # Exit if source categories are missing
+            if not dataContainer.exists("entry") and not (dataContainer.exists("struct_conf") or dataContainer.exists("struct_sheet_range")):
+                return False
+            #
+            # Create the new target category rcsb_prot_sec_struct_info
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            sD = self.__commonU.getProtSecStructFeatures(dataContainer)
+            # catName = rcsb_prot_sec_struct_info
+            cObj = dataContainer.getObj(catName)
+            #
+            xObj = dataContainer.getObj("entry")
+            entryId = xObj.getValue("id", 0)
+            #
+            for ii, asymId in enumerate(sD["helixCountD"]):
+                cObj.setValue(entryId, "entry_id", ii)
+                cObj.setValue(asymId, "label_asym_id", ii)
+                #
+                cObj.setValue(sD["helixCountD"][asymId], "helix_count", ii)
+                cObj.setValue(sD["sheetStrandCountD"][asymId], "beta_strand_count", ii)
+                cObj.setValue(sD["unassignedCountD"][asymId], "unassigned_count", ii)
+                #
+                cObj.setValue(",".join([str(t) for t in sD["helixLengthD"][asymId]]), "helix_length", ii)
+                cObj.setValue(",".join([str(t) for t in sD["sheetStrandLengthD"][asymId]]), "beta_strand_length", ii)
+                cObj.setValue(",".join([str(t) for t in sD["unassignedLengthD"][asymId]]), "unassigned_length", ii)
+
+                cObj.setValue("%.2f" % (100.0 * sD["helixFracD"][asymId]), "helix_coverage_percent", ii)
+                cObj.setValue("%.2f" % (100.0 * sD["sheetStrandFracD"][asymId]), "beta_strand_coverage_percent", ii)
+                cObj.setValue("%.2f" % (100.0 * sD["unassignedFracD"][asymId]), "unassigned_coverage_percent", ii)
+
+                cObj.setValue(",".join(sD["sheetSenseD"][asymId]), "beta_sheet_sense", ii)
+                cObj.setValue(",".join([str(t) for t in sD["sheetFullStrandCountD"][asymId]]), "beta_sheet_strand_count", ii)
+
+                cObj.setValue(sD["featureMonomerSequenceD"][asymId], "feature_monomer_sequence", ii)
+                cObj.setValue(sD["featureSequenceD"][asymId], "feature_sequence", ii)
+
+            return True
+        except Exception as e:
+            logger.exception("For %s %r failing with %s", dataContainer.getName(), catName, str(e))
+        return False
+
+    def addConnectionDetails(self, dataContainer, catName, **kwargs):
+        """Build rcsb_struct_conn category -
+
+        Args:
+            dataContainer (object):  mmcif.api.mmif.api.DataContainer object instance
+            catName (str): category name
+
+        Returns:
+            bool: True for success or False otherwise
+
+        Example:
+                loop_
+                _rcsb_struct_conn.ordinal_id
+                _rcsb_struct_conn.id
+                _rcsb_struct_conn.conn_type
+                _rcsb_struct_conn.connect_target_label_comp_id
+                _rcsb_struct_conn.connect_target_label_asym_id
+                _rcsb_struct_conn.connect_target_label_seq_id
+                _rcsb_struct_conn.connect_target_label_atom_id
+                _rcsb_struct_conn.connect_target_label_alt_id
+                _rcsb_struct_conn.connect_target_auth_asym_id
+                _rcsb_struct_conn.connect_target_auth_seq_id
+                _rcsb_struct_conn.connect_target_symmetry
+                _rcsb_struct_conn.connect_partner_label_comp_id
+                _rcsb_struct_conn.connect_partner_label_asym_id
+                _rcsb_struct_conn.connect_partner_label_seq_id
+                _rcsb_struct_conn.connect_partner_label_atom_id
+                _rcsb_struct_conn.connect_partner_label_alt_id
+                _rcsb_struct_conn.connect_partner_symmetry
+                _rcsb_struct_conn.details
+
+                # - - - - data truncated for brevity - - - -
+        """
+        try:
+            logger.debug("Starting with %r %r %r", dataContainer.getName(), catName, kwargs)
+            # Exit if source categories are missing
+            if not dataContainer.exists("entry") and not dataContainer.exists("struct_conn"):
+                return False
+            #
+            # Create the new target category rcsb_struct_conn
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            cDL = self.__commonU.getInstanceConnections(dataContainer)
+            asymIdD = self.__commonU.getInstanceEntityMap(dataContainer)
+            asymAuthIdD = self.__commonU.getAsymAuthIdMap(dataContainer)
+            #
+            # catName = rcsb_struct_conn
+            cObj = dataContainer.getObj(catName)
+            #
+            xObj = dataContainer.getObj("entry")
+            entryId = xObj.getValue("id", 0)
+            #
+            for ii, cD in enumerate(cDL):
+                asymId = cD["connect_target_label_asym_id"]
+                entityId = asymIdD[asymId]
+                authAsymId = asymAuthIdD[asymId] if asymId in asymAuthIdD else None
+                cObj.setValue(ii + 1, "ordinal_id", ii)
+                cObj.setValue(entryId, "entry_id", ii)
+                cObj.setValue(asymId, "asym_id", ii)
+                cObj.setValue(entityId, "entity_id", ii)
+                if authAsymId:
+                    cObj.setValue(authAsymId, "auth_asym_id", ii)
+                else:
+                    logger.error("Missing mapping for %s asymId %s to authAsymId ", entryId, asymId)
+                for ky, val in cD.items():
+                    cObj.setValue(val, ky, ii)
+                #
+            return True
+        except Exception as e:
+            logger.exception("For %s %r failing with %s", dataContainer.getName(), catName, str(e))
+        return False
+
+    def __stripWhiteSpace(self, val):
+        """Remove all white space from the input value."""
+        if val is None:
+            return val
+        return self.__wsPattern.sub("", val)
+
+    def buildInstanceValidationFeatures(self, dataContainer, catName, **kwargs):
+        """Build category rcsb_entity_instance_validation_feature ...
+
+        Example:
+            loop_
+            _rcsb_entity_instance_validation_feature.ordinal
+            _rcsb_entity_instance_validation_feature.entry_id
+            _rcsb_entity_instance_validation_feature.entity_id
+            _rcsb_entity_instance_validation_feature.asym_id
+            _rcsb_entity_instance_validation_feature.auth_asym_id
+            _rcsb_entity_instance_validation_feature.feature_id
+            _rcsb_entity_instance_validation_feature.type
+            _rcsb_entity_instance_validation_feature.name
+            _rcsb_entity_instance_validation_feature.description
+            _rcsb_entity_instance_validation_feature.annotation_lineage_id
+            _rcsb_entity_instance_validation_feature.annotation_lineage_name
+            _rcsb_entity_instance_validation_feature.annotation_lineage_depth
+            _rcsb_entity_instance_validation_feature.reference_scheme
+            _rcsb_entity_instance_validation_feature.provenance_source
+            _rcsb_entity_instance_validation_feature.assignment_version
+            _rcsb_entity_instance_validation_feature.feature_positions_beg_seq_id
+            _rcsb_entity_instance_validation_feature.feature_positions_end_seq_id
+            _rcsb_entity_instance_validation_feature.feature_positions_beg_comp_id
+            #
+            _rcsb_entity_instance_validation_feature.feature_value_comp_id
+            _rcsb_entity_instance_validation_feature.feature_value_reported
+            _rcsb_entity_instance_validation_feature.feature_value_reference
+            _rcsb_entity_instance_validation_feature.feature_value_uncertainty_estimate
+            _rcsb_entity_instance_validation_feature.feature_value_uncertainty_estimate_type
+            _rcsb_entity_instance_validation_feature.feature_value_details
+
+        """
+        logger.debug("Starting with %r %r %r", dataContainer.getName(), catName, kwargs)
+        typeMapD = {
+            "ROTAMER_OUTLIER": "Molprobity rotamer outlier",
+            "RAMACHANDRAN_OUTLIER": "Molprobity Ramachandran outlier",
+            "RSRZ_OUTLIER": "Real space R-value Z score > 2",
+            "RSCC_OUTLIER": "Real space density correlation value < 0.65",
+            "MOGUL_BOND_OUTLIER": "Mogul bond distance outlier",
+            "MOGUL_ANGLE_OUTLIER": "Mogul bond angle outlier",
+            "BOND_OUTLIER": "Molprobity bond distance outlier",
+            "ANGLE_OUTLIER": "Molprobity bond angle outlier",
+        }
+        try:
+            if catName != "rcsb_entity_instance_validation_feature":
+                return False
+            # Exit if source categories are missing
+            if not dataContainer.exists("entry"):
+                return False
+            #
+            eObj = dataContainer.getObj("entry")
+            entryId = eObj.getValue("id", 0)
+            #
+            # Create the new target category
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            cObj = dataContainer.getObj(catName)
+            ii = cObj.getRowCount()
+            #
+            asymIdD = self.__commonU.getInstanceEntityMap(dataContainer)
+            asymAuthIdD = self.__commonU.getAsymAuthIdMap(dataContainer)
+            #
+            instanceModelOutlierD = self.__commonU.getInstanceModelOutlierInfo(dataContainer)
+            #
+            # ("OutlierValue", "compId, seqId, outlierType, description, reported, reference, uncertaintyValue, uncertaintyType")
+            #
+            logger.debug("Length instanceModelOutlierD %d", len(instanceModelOutlierD))
+            #
+            # (modelId, asymId), []).append((compId, int(seqId), "RSCC_OUTLIER", tS)
+            for (modelId, asymId, hasSeq), pTupL in instanceModelOutlierD.items():
+                fTypeL = sorted(set([pTup.outlierType for pTup in pTupL]))
+                jj = 1
+                for fType in fTypeL:
+                    if (asymId not in asymIdD) or (asymId not in asymAuthIdD):
+                        continue
+                    entityId = asymIdD[asymId]
+                    authAsymId = asymAuthIdD[asymId]
+                    #
+                    cObj.setValue(ii + 1, "ordinal", ii)
+                    cObj.setValue(entryId, "entry_id", ii)
+                    cObj.setValue(entityId, "entity_id", ii)
+                    cObj.setValue(asymId, "asym_id", ii)
+                    cObj.setValue(authAsymId, "auth_asym_id", ii)
+
+                    #
+                    cObj.setValue(fType, "type", ii)
+                    tN = typeMapD[fType] if fType in typeMapD else fType
+                    cObj.setValue(tN, "name", ii)
+                    #
+                    tFn = "%s_%d" % (fType, jj)
+                    cObj.setValue(tFn, "feature_id", ii)
+                    #
+                    if hasSeq:
+                        descriptionS = tN + " in instance %s model %s" % (asymId, modelId)
+                        cObj.setValue(";".join([pTup.compId for pTup in pTupL if pTup.outlierType == fType]), "feature_positions_beg_comp_id", ii)
+                        cObj.setValue(";".join([str(pTup.seqId) for pTup in pTupL if pTup.outlierType == fType]), "feature_positions_beg_seq_id", ii)
+
+                    else:
+                        cObj.setValue(pTupL[0].compId, "comp_id", ii)
+                        descriptionS = tN + " in %s instance %s model %s" % (pTupL[0].compId, asymId, modelId)
+                        cObj.setValue(";".join([pTup.compId if pTup.compId else "?" for pTup in pTupL if pTup.outlierType == fType]), "feature_value_comp_id", ii)
+                        cObj.setValue(";".join([pTup.description if pTup.description else "?" for pTup in pTupL if pTup.outlierType == fType]), "feature_value_details", ii)
+                        cObj.setValue(";".join([pTup.reported if pTup.reported else "?" for pTup in pTupL if pTup.outlierType == fType]), "feature_value_reported", ii)
+                        cObj.setValue(";".join([pTup.reference if pTup.reference else "?" for pTup in pTupL if pTup.outlierType == fType]), "feature_value_reference", ii)
+                        cObj.setValue(
+                            ";".join([pTup.uncertaintyValue if pTup.uncertaintyValue else "?" for pTup in pTupL if pTup.outlierType == fType]),
+                            "feature_value_uncertainty_estimate",
+                            ii,
+                        )
+                        cObj.setValue(
+                            ";".join([pTup.uncertaintyType if pTup.uncertaintyType else "?" for pTup in pTupL if pTup.outlierType == fType]),
+                            "feature_value_uncertainty_estimate_type",
+                            ii,
+                        )
+                    cObj.setValue("PDB entity", "reference_scheme", ii)
+                    cObj.setValue(descriptionS, "description", ii)
+                    cObj.setValue("PDB", "provenance_source", ii)
+                    cObj.setValue("V1.0", "assignment_version", ii)
+                    #
+                    jj += 1
+                    ii += 1
+            #
+            ##
+            return True
+        except Exception as e:
+            logger.exception("For %s %r failing with %s", dataContainer.getName(), catName, str(e))
+        return False
+
+    # --- JDW
+    def buildInstanceValidationFeatureSummaryPrev(self, dataContainer, catName, **kwargs):
+        """Build category rcsb_entity_instance_validation_feature_summary
+
+        Example:
+
+            loop_
+            _rcsb_entity_instance_validation_feature_summary.ordinal
+            _rcsb_entity_instance_validation_feature_summary.entry_id
+            _rcsb_entity_instance_validation_feature_summary.entity_id
+            _rcsb_entity_instance_validation_feature_summary.asym_id
+            _rcsb_entity_instance_validation_feature_summary.auth_asym_id
+            #validation_
+            _rcsb_entity_instance_validation_feature_summary.type
+            _rcsb_entity_instance_validation_feature_summary.count
+            _rcsb_entity_instance_validation_feature_summary.coverage
+            # ...
+        """
+        logger.debug("Starting with %r %r %r", dataContainer.getName(), catName, kwargs)
+        try:
+            if catName != "rcsb_entity_instance_validation_feature_summary":
+                return False
+            if not dataContainer.exists("rcsb_entity_instance_validation_feature") and not dataContainer.exists("entry"):
+                return False
+
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            #
+            eObj = dataContainer.getObj("entry")
+            entryId = eObj.getValue("id", 0)
+            #
+            sObj = dataContainer.getObj(catName)
+            fObj = dataContainer.getObj("rcsb_entity_instance_validation_feature")
+            #
+            instIdMapD = self.__commonU.getInstanceIdMap(dataContainer)
+            instEntityD = self.__commonU.getInstanceEntityMap(dataContainer)
+            entityPolymerLengthD = self.__commonU.getPolymerEntityLengthsEnumerated(dataContainer)
+            asymAuthD = self.__commonU.getAsymAuthIdMap(dataContainer)
+
+            fCountD = OrderedDict()
+            fMonomerCountD = OrderedDict()
+            fInstanceCountD = OrderedDict()
+            for ii in range(fObj.getRowCount()):
+                asymId = fObj.getValue("asym_id", ii)
+                # ---- initialize counts
+                # fCountD = self.__initializeInstanceValidationFeatureType(dataContainer, asymId, fCountD, countType="set")
+                # fMonomerCountD = self.__initializeInstanceValidationFeatureType(dataContainer, asymId, fMonomerCountD, countType="list")
+                # fInstanceCountD = self.__initializeInstanceValidationFeatureType(dataContainer, asymId, fInstanceCountD, countType="list")
+                # ----
+                fType = fObj.getValue("type", ii)
+                fId = fObj.getValue("feature_id", ii)
+                fCountD.setdefault(asymId, {}).setdefault(fType, set()).add(fId)
+                #
+                tbegS = fObj.getValueOrDefault("feature_positions_beg_seq_id", ii, defaultValue=None)
+                tendS = fObj.getValueOrDefault("feature_positions_end_seq_id", ii, defaultValue=None)
+                if fObj.hasAttribute("feature_positions_beg_seq_id") and tbegS is not None and fObj.hasAttribute("feature_positions_end_seq_id") and tendS is not None:
+                    begSeqIdL = str(fObj.getValue("feature_positions_beg_seq_id", ii)).split(";")
+                    endSeqIdL = str(fObj.getValue("feature_positions_end_seq_id", ii)).split(";")
+                    monCount = 0
+                    for begSeqId, endSeqId in zip(begSeqIdL, endSeqIdL):
+                        try:
+                            monCount += abs(int(endSeqId) - int(begSeqId) + 1)
+                        except Exception:
+                            logger.warning(
+                                "In %s fType %r fId %r bad sequence range begSeqIdL %r endSeqIdL %r tbegS %r tendS %r",
+                                dataContainer.getName(),
+                                fType,
+                                fId,
+                                begSeqIdL,
+                                endSeqIdL,
+                                tbegS,
+                                tendS,
+                            )
+                    fMonomerCountD.setdefault(asymId, {}).setdefault(fType, []).append(monCount)
+                elif fObj.hasAttribute("feature_positions_beg_seq_id") and tbegS:
+                    seqIdL = str(fObj.getValue("feature_positions_beg_seq_id", ii)).split(";")
+                    fMonomerCountD.setdefault(asymId, {}).setdefault(fType, []).append(len(seqIdL))
+
+                tS = fObj.getValueOrDefault("feature_value_details", ii, defaultValue=None)
+                if fObj.hasAttribute("feature_value_details") and tS is not None:
+                    dL = str(fObj.getValue("feature_value_details", ii)).split(";")
+                    fInstanceCountD.setdefault(asymId, {}).setdefault(fType, []).append(len(dL))
+            #
+            # logger.debug("%s fCountD %r", entryId, fCountD)
+            #
+            ii = 0
+            for asymId, fTypeD in fCountD.items():
+                entityId = instEntityD[asymId]
+                authAsymId = asymAuthD[asymId]
+                for fType, fS in fTypeD.items():
+                    #
+                    sObj.setValue(ii + 1, "ordinal", ii)
+                    sObj.setValue(entryId, "entry_id", ii)
+                    sObj.setValue(entityId, "entity_id", ii)
+                    sObj.setValue(asymId, "asym_id", ii)
+                    if asymId in instIdMapD and "comp_id" in instIdMapD[asymId] and instIdMapD[asymId]["comp_id"]:
+                        sObj.setValue(instIdMapD[asymId]["comp_id"], "comp_id", ii)
+                    sObj.setValue(authAsymId, "auth_asym_id", ii)
+                    sObj.setValue(fType, "type", ii)
+                    fracC = 0.0
+                    #
+                    if asymId in fMonomerCountD and fType in fMonomerCountD[asymId] and fMonomerCountD[asymId][fType]:
+                        fCount = sum(fMonomerCountD[asymId][fType])
+                        if asymId in fMonomerCountD and fType in fMonomerCountD[asymId] and entityId in entityPolymerLengthD:
+                            fracC = float(sum(fMonomerCountD[asymId][fType])) / float(entityPolymerLengthD[entityId])
+                    elif asymId in fInstanceCountD and fType in fInstanceCountD[asymId] and fInstanceCountD[asymId][fType]:
+                        fCount = sum(fInstanceCountD[asymId][fType])
+                    else:
+                        fCount = len(fS)
+                    #
+                    sObj.setValue(fCount, "count", ii)
+                    sObj.setValue(round(fracC, 5), "coverage", ii)
+                    #
+                    ii += 1
+
+        except Exception as e:
+            logger.exception("Failing with %s", str(e))
+        return True
+
+    def __initializeInstanceValidationFeatureType(self, dataContainer, asymId, fCountD, countType="set"):
+        instTypeD = self.__commonU.getInstanceTypes(dataContainer)
+        eType = instTypeD[asymId]
+        eTupL = []
+        # rcsb_entity_instance_validation_feature_summary.type
+        if eType == "polymer":
+            eTupL = self.__dApi.getEnumListWithDetail("rcsb_entity_instance_validation_feature_summary", "type")
+        elif eType in ["non-polymer", "water"]:
+            eTupL = self.__dApi.getEnumListWithDetail("rcsb_entity_instance_validation_feature_summary", "type")
+        elif eType == "branched":
+            eTupL = self.__dApi.getEnumListWithDetail("rcsb_entity_instance_validation_feature_summary", "type")
+        else:
+            logger.error("%r asymId %r eType %r", dataContainer.getName(), asymId, eType)
+        #
+        fTypeL = sorted([tup[0] for tup in eTupL])
+        #
+        for fType in fTypeL:
+            if countType == "set":
+                fCountD.setdefault(asymId, {}).setdefault(fType, set())
+            else:
+                fCountD.setdefault(asymId, {}).setdefault(fType, [])
+            #
+        return fCountD
+
+    # --- JDW
+    def __getInstanceFeatureTypes(self, eType):
+        #
+        vTupL = self.__dApi.getEnumListWithDetail("rcsb_entity_instance_validation_feature_summary", "type")
+        if eType == "polymer":
+            eTupL = self.__dApi.getEnumListWithDetail("rcsb_polymer_instance_feature_summary", "type")
+        elif eType in ["non-polymer", "water"]:
+            eTupL = self.__dApi.getEnumListWithDetail("rcsb_nonpolymer_instance_feature_summary", "type")
+        elif eType == "branched":
+            eTupL = self.__dApi.getEnumListWithDetail("rcsb_branched_instance_feature_summary", "type")
+        else:
+            logger.error("Unexpected eType %r -- no features types provided", eType)
+            eTupL = []
+        # Distinct elements in the instance specific categories. (remove validation types)
+        vTypeL = sorted([tup[0] for tup in vTupL])
+        iTypeL = sorted([tup[0] for tup in eTupL])
+        fTypeL = sorted(set(iTypeL) - set(vTypeL))
+        return fTypeL
+
+    def __getInstanceValidationFeatureTypes(self, eType):
+        #
+        vTupL = self.__dApi.getEnumListWithDetail("rcsb_entity_instance_validation_feature_summary", "type")
+        if eType == "polymer":
+            eTupL = self.__dApi.getEnumListWithDetail("rcsb_polymer_instance_feature_summary", "type")
+        elif eType in ["non-polymer", "water"]:
+            eTupL = self.__dApi.getEnumListWithDetail("rcsb_nonpolymer_instance_feature_summary", "type")
+        elif eType == "branched":
+            eTupL = self.__dApi.getEnumListWithDetail("rcsb_branched_instance_feature_summary", "type")
+        else:
+            logger.error("Unexpected eType %r -- no features types provided", eType)
+            eTupL = []
+        # Common elements in the instance specific categories.
+        vTypeL = sorted([tup[0] for tup in vTupL])
+        iTypeL = sorted([tup[0] for tup in eTupL])
+        fTypeL = sorted(set(vTypeL).intersection(iTypeL))
+        return fTypeL
+
+    # --- JDW
+    def buildEntityInstanceFeatureSummary(self, dataContainer, catName, **kwargs):
+        """Build category rcsb_entity_instance_feature_summary (UPDATED)
+
+        Example:
+
+            loop_
+            _rcsb_entity_instance_feature_summary.ordinal
+            _rcsb_entity_instance_feature_summary.entry_id
+            _rcsb_entity_instance_feature_summary.entity_id
+            _rcsb_entity_instance_feature_summary.asym_id
+            _rcsb_entity_instance_feature_summary.auth_asym_id
+            #
+            _rcsb_entity_instance_feature_summary.type
+            _rcsb_entity_instance_feature_summary.count
+            _rcsb_entity_instance_feature_summary.coverage
+            # ...
+        """
+        logger.debug("Starting with %r %r %r", dataContainer.getName(), catName, kwargs)
+        try:
+            if catName != "rcsb_entity_instance_feature_summary":
+                return False
+            if not dataContainer.exists("rcsb_entity_instance_feature") and not dataContainer.exists("entry"):
+                return False
+
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            #
+            eObj = dataContainer.getObj("entry")
+            entryId = eObj.getValue("id", 0)
+            #
+            sObj = dataContainer.getObj(catName)
+            fObj = dataContainer.getObj("rcsb_entity_instance_feature")
+            #
+            instEntityD = self.__commonU.getInstanceEntityMap(dataContainer)
+            entityPolymerLengthD = self.__commonU.getPolymerEntityLengthsEnumerated(dataContainer)
+            # typeList = self.__dApi.getEnumList("rcsb_entity_instance_feature_summary", "type", sortFlag=True)
+            asymAuthD = self.__commonU.getAsymAuthIdMap(dataContainer)
+            instIdMapD = self.__commonU.getInstanceIdMap(dataContainer)
+            instTypeD = self.__commonU.getInstanceTypes(dataContainer)
+            #
+            fCountD = OrderedDict()
+            fValuesD = OrderedDict()
+            fMonomerCountD = OrderedDict()
+            for ii in range(fObj.getRowCount()):
+                asymId = fObj.getValue("asym_id", ii)
+                # ---- initialize counts
+                # fCountD = self.__initializeInstanceFeatureType(dataContainer, asymId, fCountD, countType="set")
+                # fMonomerCountD = self.__initializeInstanceFeatureType(dataContainer, asymId, fMonomerCountD, countType="list")
+                # ----
+                fType = fObj.getValue("type", ii)
+                fId = fObj.getValue("feature_id", ii)
+                fCountD.setdefault(asymId, {}).setdefault(fType, set()).add(fId)
+                #
+                tbegS = fObj.getValueOrDefault("feature_positions_beg_seq_id", ii, defaultValue=None)
+                tendS = fObj.getValueOrDefault("feature_positions_end_seq_id", ii, defaultValue=None)
+                if fObj.hasAttribute("feature_positions_beg_seq_id") and tbegS is not None and fObj.hasAttribute("feature_positions_end_seq_id") and tendS is not None:
+                    begSeqIdL = str(fObj.getValue("feature_positions_beg_seq_id", ii)).split(";")
+                    endSeqIdL = str(fObj.getValue("feature_positions_end_seq_id", ii)).split(";")
+                    monCount = 0
+                    for begSeqId, endSeqId in zip(begSeqIdL, endSeqIdL):
+                        try:
+                            monCount += abs(int(endSeqId) - int(begSeqId) + 1)
+                        except Exception:
+                            logger.warning(
+                                "%s fType %r fId %r bad sequence begSeqIdL %r endSeqIdL %r tbegS %r tendS %r",
+                                dataContainer.getName(),
+                                fType,
+                                fId,
+                                begSeqIdL,
+                                endSeqIdL,
+                                tbegS,
+                                tendS,
+                            )
+
+                    fMonomerCountD.setdefault(asymId, {}).setdefault(fType, []).append(monCount)
+                elif fObj.hasAttribute("feature_positions_beg_seq_id") and tbegS:
+                    seqIdL = str(fObj.getValue("feature_positions_beg_seq_id", ii)).split(";")
+                    fMonomerCountD.setdefault(asymId, {}).setdefault(fType, []).append(len(seqIdL))
+                # JDW
+                elif fObj.hasAttribute("feature_value_reported"):
+                    tValue = fObj.getValueOrDefault("feature_value_reported", ii, defaultValue=None)
+                    if tValue:
+                        try:
+                            tvL = [float(t) for t in tValue.split(";")]
+                            fValuesD.setdefault(asymId, {}).setdefault(fType, []).extend(tvL)
+                        except Exception:
+                            pass
+
+            #
+            logger.debug("%s fCountD %r", entryId, fCountD)
+            #
+
+            ii = 0
+            for asymId, entityId in instEntityD.items():
+                eType = instTypeD[asymId]
+                authAsymId = asymAuthD[asymId]
+                fTypeL = self.__getInstanceFeatureTypes(eType)
+                # All entity type specific features
+                for fType in fTypeL:
+                    sObj.setValue(ii + 1, "ordinal", ii)
+                    sObj.setValue(entryId, "entry_id", ii)
+                    sObj.setValue(entityId, "entity_id", ii)
+                    sObj.setValue(asymId, "asym_id", ii)
+                    sObj.setValue(authAsymId, "auth_asym_id", ii)
+                    # add comp
+                    if asymId in instIdMapD and "comp_id" in instIdMapD[asymId] and instIdMapD[asymId]["comp_id"]:
+                        sObj.setValue(instIdMapD[asymId]["comp_id"], "comp_id", ii)
+                    sObj.setValue(fType, "type", ii)
+                    fracC = 0.0
+                    minL = maxL = 0
+                    if asymId in fMonomerCountD and fType in fMonomerCountD[asymId]:
+                        if fType.startswith("UNOBSERVED"):
+                            fCount = sum(fMonomerCountD[asymId][fType])
+                        else:
+                            fCount = len(fCountD[asymId][fType])
+
+                        if entityId in entityPolymerLengthD:
+                            fracC = float(sum(fMonomerCountD[asymId][fType])) / float(entityPolymerLengthD[entityId])
+
+                        if fType in ["CATH", "SCOP", "HELIX_P", "SHEET", "UNASSIGNED_SEC_STRUCT", "UNOBSERVED_RESIDUE_XYZ", "ZERO_OCCUPANCY_RESIDUE_XYZ"]:
+                            minL = min(fMonomerCountD[asymId][fType])
+                            maxL = max(fMonomerCountD[asymId][fType])
+
+                    elif asymId in fCountD and fType in fCountD[asymId] and fCountD[asymId][fType]:
+                        fCount = len(fCountD[asymId][fType])
+                    else:
+                        fCount = 0
+                    #
+                    minV = maxV = 0
+                    if asymId in fValuesD and fType in fValuesD[asymId]:
+                        if fType in ["HAS_COVALENT_LINKAGE", "HAS_METAL_COORDINATION_LINKAGE"]:
+                            try:
+                                minV = min(fValuesD[asymId][fType])
+                                maxV = max(fValuesD[asymId][fType])
+                            except Exception:
+                                pass
+
+                    sObj.setValue(fCount, "count", ii)
+                    sObj.setValue(round(fracC, 5), "coverage", ii)
+                    if minL is not None:
+                        sObj.setValue(minL, "minimum_length", ii)
+                        sObj.setValue(maxL, "maximum_length", ii)
+                    if minV is not None:
+                        sObj.setValue(minV, "minimum_value", ii)
+                        sObj.setValue(maxV, "maximum_value", ii)
+                    #
+                    ii += 1
+        except Exception as e:
+            logger.exception("Failing for %s with %s", dataContainer.getName(), str(e))
+        return True
+
+    def buildInstanceValidationFeatureSummary(self, dataContainer, catName, **kwargs):
+        """Build category rcsb_entity_instance_validation_feature_summary
+
+        Example:
+
+            loop_
+            _rcsb_entity_instance_validation_feature_summary.ordinal
+            _rcsb_entity_instance_validation_feature_summary.entry_id
+            _rcsb_entity_instance_validation_feature_summary.entity_id
+            _rcsb_entity_instance_validation_feature_summary.asym_id
+            _rcsb_entity_instance_validation_feature_summary.auth_asym_id
+            _rcsb_entity_instance_validation_feature_summary.type
+            _rcsb_entity_instance_validation_feature_summary.count
+            _rcsb_entity_instance_validation_feature_summary.coverage
+            # ...
+        """
+        logger.debug("Starting with %r %r %r", dataContainer.getName(), catName, kwargs)
+        try:
+            if catName != "rcsb_entity_instance_validation_feature_summary":
+                return False
+            if not dataContainer.exists("rcsb_entity_instance_validation_feature") and not dataContainer.exists("entry"):
+                return False
+
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            #
+            eObj = dataContainer.getObj("entry")
+            entryId = eObj.getValue("id", 0)
+            #
+            sObj = dataContainer.getObj(catName)
+            fObj = dataContainer.getObj("rcsb_entity_instance_validation_feature")
+            #
+            instIdMapD = self.__commonU.getInstanceIdMap(dataContainer)
+            instEntityD = self.__commonU.getInstanceEntityMap(dataContainer)
+            entityPolymerLengthD = self.__commonU.getPolymerEntityLengthsEnumerated(dataContainer)
+            asymAuthD = self.__commonU.getAsymAuthIdMap(dataContainer)
+            instTypeD = self.__commonU.getInstanceTypes(dataContainer)
+
+            fCountD = OrderedDict()
+            fMonomerCountD = OrderedDict()
+            fInstanceCountD = OrderedDict()
+            for ii in range(fObj.getRowCount()):
+                asymId = fObj.getValue("asym_id", ii)
+                fType = fObj.getValue("type", ii)
+                fId = fObj.getValue("feature_id", ii)
+                fCountD.setdefault(asymId, {}).setdefault(fType, set()).add(fId)
+                #
+                tbegS = fObj.getValueOrDefault("feature_positions_beg_seq_id", ii, defaultValue=None)
+                tendS = fObj.getValueOrDefault("feature_positions_end_seq_id", ii, defaultValue=None)
+                if fObj.hasAttribute("feature_positions_beg_seq_id") and tbegS is not None and fObj.hasAttribute("feature_positions_end_seq_id") and tendS is not None:
+                    begSeqIdL = str(fObj.getValue("feature_positions_beg_seq_id", ii)).split(";")
+                    endSeqIdL = str(fObj.getValue("feature_positions_end_seq_id", ii)).split(";")
+                    monCount = 0
+                    for begSeqId, endSeqId in zip(begSeqIdL, endSeqIdL):
+                        try:
+                            monCount += abs(int(endSeqId) - int(begSeqId) + 1)
+                        except Exception:
+                            logger.warning(
+                                "In %s fType %r fId %r bad sequence range begSeqIdL %r endSeqIdL %r tbegS %r tendS %r",
+                                dataContainer.getName(),
+                                fType,
+                                fId,
+                                begSeqIdL,
+                                endSeqIdL,
+                                tbegS,
+                                tendS,
+                            )
+                    fMonomerCountD.setdefault(asymId, {}).setdefault(fType, []).append(monCount)
+                elif fObj.hasAttribute("feature_positions_beg_seq_id") and tbegS:
+                    seqIdL = str(fObj.getValue("feature_positions_beg_seq_id", ii)).split(";")
+                    fMonomerCountD.setdefault(asymId, {}).setdefault(fType, []).append(len(seqIdL))
+
+                tS = fObj.getValueOrDefault("feature_value_details", ii, defaultValue=None)
+                if fObj.hasAttribute("feature_value_details") and tS is not None:
+                    dL = str(fObj.getValue("feature_value_details", ii)).split(";")
+                    fInstanceCountD.setdefault(asymId, {}).setdefault(fType, []).append(len(dL))
+            #
+            ii = 0
+            # Summarize all instances -
+            for asymId, entityId in instEntityD.items():
+                eType = instTypeD[asymId]
+                authAsymId = asymAuthD[asymId]
+                fTypeL = self.__getInstanceValidationFeatureTypes(eType)
+                # All entity type specific features
+                for fType in fTypeL:
+                    #
+                    sObj.setValue(ii + 1, "ordinal", ii)
+                    sObj.setValue(entryId, "entry_id", ii)
+                    sObj.setValue(entityId, "entity_id", ii)
+                    sObj.setValue(asymId, "asym_id", ii)
+                    if asymId in instIdMapD and "comp_id" in instIdMapD[asymId] and instIdMapD[asymId]["comp_id"]:
+                        sObj.setValue(instIdMapD[asymId]["comp_id"], "comp_id", ii)
+                    sObj.setValue(authAsymId, "auth_asym_id", ii)
+                    sObj.setValue(fType, "type", ii)
+                    #
+                    # Sum features of different granularities
+                    #
+                    fracC = 0.0
+                    if asymId in fMonomerCountD and fType in fMonomerCountD[asymId] and fMonomerCountD[asymId][fType]:
+                        fCount = sum(fMonomerCountD[asymId][fType])
+                        if asymId in fMonomerCountD and fType in fMonomerCountD[asymId] and entityId in entityPolymerLengthD:
+                            fracC = float(sum(fMonomerCountD[asymId][fType])) / float(entityPolymerLengthD[entityId])
+                    elif asymId in fInstanceCountD and fType in fInstanceCountD[asymId] and fInstanceCountD[asymId][fType]:
+                        fCount = sum(fInstanceCountD[asymId][fType])
+                    elif asymId in fCountD and fType in fCountD[asymId] and fCountD[asymId][fType]:
+                        fCount = len(fCountD[asymId][fType])
+                    else:
+                        # default zero value
+                        fCount = 0
+                    #
+                    sObj.setValue(fCount, "count", ii)
+                    sObj.setValue(round(fracC, 5), "coverage", ii)
+                    #
+                    ii += 1
+
+        except Exception as e:
+            logger.exception("Failing with %s", str(e))
+        return True
+
+    #
+    def buildEntityInstanceAnnotations(self, dataContainer, catName, **kwargs):
+        """Build category rcsb_entity_instance_annotation ...
+
+        Example:
+            loop_
+            _rcsb_entity_instance_annotation.ordinal
+            _rcsb_entity_instance_annotation.entry_id
+            _rcsb_entity_instance_annotation.entity_id
+            _rcsb_entity_instance_annotation.asym_id
+            _rcsb_entity_instance_annotation.auth_asym_id
+            _rcsb_entity_instance_annotation.annotation_id
+            _rcsb_entity_instance_annotation.type
+            _rcsb_entity_instance_annotation.name
+            _rcsb_entity_instance_annotation.description
+            _rcsb_entity_instance_annotation.annotation_lineage_id
+            _rcsb_entity_instance_annotation.annotation_lineage_name
+            _rcsb_entity_instance_annotation.annotation_lineage_depth
+            _rcsb_entity_instance_annotation.reference_scheme
+            _rcsb_entity_instance_annotation.provenance_source
+            _rcsb_entity_instance_annotation.assignment_version
+
+        """
+        logger.debug("Starting with %r %r %r", dataContainer.getName(), catName, kwargs)
+        try:
+            if catName != "rcsb_entity_instance_annotation":
+                return False
+            # Exit if source categories are missing
+            if not dataContainer.exists("entry"):
+                return False
+            #
+            # Create the new target category
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            cObj = dataContainer.getObj(catName)
+            #
+            rP = kwargs.get("resourceProvider")
+
+            eObj = dataContainer.getObj("entry")
+            entryId = eObj.getValue("id", 0)
+            #
+            asymIdD = self.__commonU.getInstanceEntityMap(dataContainer)
+            asymAuthIdD = self.__commonU.getAsymAuthIdMap(dataContainer)
+            # asymIdRangesD = self.__commonU.getInstancePolymerRanges(dataContainer)
+            # pAuthAsymD = self.__commonU.getPolymerIdMap(dataContainer)
+            instTypeD = self.__commonU.getInstanceTypes(dataContainer)
+            # ---------------
+            # Add CATH assignments
+            cathU = rP.getResource("CathProvider instance") if rP else None
+            ii = cObj.getRowCount()
+            #
+            for asymId, authAsymId in asymAuthIdD.items():
+                if instTypeD[asymId] not in ["polymer", "branched"]:
+                    continue
+                entityId = asymIdD[asymId]
+                dL = cathU.getCathResidueRanges(entryId.lower(), authAsymId)
+                logger.debug("%s asymId %s authAsymId %s dL %r", entryId, asymId, authAsymId, dL)
+                vL = cathU.getCathVersions(entryId.lower(), authAsymId)
+                for (cathId, domId, _, _, _) in dL:
+                    cObj.setValue(ii + 1, "ordinal", ii)
+                    cObj.setValue(entryId, "entry_id", ii)
+                    cObj.setValue(entityId, "entity_id", ii)
+                    cObj.setValue(asymId, "asym_id", ii)
+                    cObj.setValue(authAsymId, "auth_asym_id", ii)
+                    cObj.setValue("CATH", "type", ii)
+                    #
+                    cObj.setValue(str(cathId), "annotation_id", ii)
+                    # cObj.setValue(str(domId), "annotation_id", ii)
+                    # cObj.setValue(cathId, "name", ii)
+                    cObj.setValue(cathU.getCathName(cathId), "name", ii)
+                    #
+                    cObj.setValue(";".join(cathU.getNameLineage(cathId)), "annotation_lineage_name", ii)
+                    idLinL = cathU.getIdLineage(cathId)
+                    cObj.setValue(";".join(idLinL), "annotation_lineage_id", ii)
+                    cObj.setValue(";".join([str(jj) for jj in range(1, len(idLinL) + 1)]), "annotation_lineage_depth", ii)
+                    #
+                    cObj.setValue("CATH", "provenance_source", ii)
+                    cObj.setValue(vL[0], "assignment_version", ii)
+                    #
+                    ii += 1
+            # ------------
+            # Add SCOP assignments
+            scopU = rP.getResource("ScopProvider instance") if rP else None
+            for asymId, authAsymId in asymAuthIdD.items():
+                if instTypeD[asymId] not in ["polymer", "branched"]:
+                    continue
+                entityId = asymIdD[asymId]
+                dL = scopU.getScopResidueRanges(entryId.lower(), authAsymId)
+                version = scopU.getScopVersion()
+                for (sunId, domId, _, _, _, _) in dL:
+                    cObj.setValue(ii + 1, "ordinal", ii)
+                    cObj.setValue(entryId, "entry_id", ii)
+                    cObj.setValue(entityId, "entity_id", ii)
+                    cObj.setValue(asymId, "asym_id", ii)
+                    cObj.setValue(authAsymId, "auth_asym_id", ii)
+                    cObj.setValue("SCOP", "type", ii)
+                    #
+                    # cObj.setValue(str(sunId), "domain_id", ii)
+                    cObj.setValue(domId, "annotation_id", ii)
+                    cObj.setValue(scopU.getScopName(sunId), "name", ii)
+                    #
+                    tL = [t if t is not None else "" for t in scopU.getNameLineage(sunId)]
+                    cObj.setValue(";".join(tL), "annotation_lineage_name", ii)
+                    idLinL = scopU.getIdLineage(sunId)
+                    cObj.setValue(";".join([str(t) for t in idLinL]), "annotation_lineage_id", ii)
+                    cObj.setValue(";".join([str(jj) for jj in range(1, len(idLinL) + 1)]), "annotation_lineage_depth", ii)
+                    #
+                    cObj.setValue("SCOPe", "provenance_source", ii)
+                    cObj.setValue(version, "assignment_version", ii)
+                    #
+                    ii += 1
+            # ------------
+            #  Add covalent attchment property
+            npbD = self.__commonU.getBoundNonpolymersByInstance(dataContainer)
+            jj = 1
+            for asymId, rTupL in npbD.items():
+                for rTup in rTupL:
+                    if rTup.connectType in ["covalent bond"]:
+                        fType = "HAS_COVALENT_LINKAGE"
+                        fId = "COVALENT_LINKAGE_%d" % jj
+
+                    elif rTup.connectType in ["metal coordination"]:
+                        fType = "HAS_METAL_COORDINATION_LINKAGE"
+                        fId = "METAL_COORDINATION_LINKAGE_%d" % jj
+                    else:
+                        continue
+
+                    entityId = asymIdD[asymId]
+                    authAsymId = asymAuthIdD[asymId]
+                    cObj.setValue(ii + 1, "ordinal", ii)
+                    cObj.setValue(entryId, "entry_id", ii)
+                    cObj.setValue(entityId, "entity_id", ii)
+                    cObj.setValue(asymId, "asym_id", ii)
+                    cObj.setValue(authAsymId, "auth_asym_id", ii)
+                    cObj.setValue(rTup.targetCompId, "comp_id", ii)
+                    cObj.setValue(fId, "annotation_id", ii)
+                    cObj.setValue(fType, "type", ii)
+                    #
+                    # ("targetCompId", "connectType", "partnerCompId", "partnerAsymId", "partnerEntityType", "bondDistance", "bondOrder")
+                    cObj.setValue(
+                        "%s has %s with %s instance %s in model 1" % (rTup.targetCompId, rTup.connectType, rTup.partnerEntityType, rTup.partnerAsymId),
+                        "description",
+                        ii,
+                    )
+
+                    cObj.setValue("PDB", "provenance_source", ii)
+                    cObj.setValue("V1.0", "assignment_version", ii)
+                    #
+                    ii += 1
+                    jj += 1
+
+            return True
+        except Exception as e:
+            logger.exception("%s %s failing with %s", dataContainer.getName(), catName, str(e))
+        return False
+
+    def buildInstanceValidationScores(self, dataContainer, catName, **kwargs):
+        """Build category rcsb_nonpolymer_instance_validation_score ...
+
+        Example:
+            loop_
+            _rcsb_nonpolymer_instance_validation_score.ordinal
+            _rcsb_nonpolymer_instance_validation_score.entry_id
+            _rcsb_nonpolymer_instance_validation_score.entity_id
+            _rcsb_nonpolymer_instance_validation_score.asym_id
+            _rcsb_nonpolymer_instance_validation_score.auth_asym_id
+            _rcsb_nonpolymer_instance_validation_score.comp_id
+            _rcsb_nonpolymer_instance_validation_score.model_id
+            _rcsb_nonpolymer_instance_validation_score.type
+            _rcsb_nonpolymer_instance_validation_score.mogul_angles_RMSZ
+            _rcsb_nonpolymer_instance_validation_score.mogul_bonds_RMSZ
+            _rcsb_nonpolymer_instance_validation_score.RSR
+            _rcsb_nonpolymer_instance_validation_score.RSCC
+            _rcsb_nonpolymer_instance_validation_score.score_model_fit
+            _rcsb_nonpolymer_instance_validation_score.score_model_geometry
+            _rcsb_nonpolymer_instance_validation_score.ranking_model_fit
+            _rcsb_nonpolymer_instance_validation_score.ranking_model_geometry
+            _rcsb_nonpolymer_instance_validation_score.is_best_instance
+            _rcsb_nonpolymer_instance_validation_score.is_subject_of_investigation
+            #
+        """
+        logger.debug("Starting with %s %r %r", dataContainer.getName(), catName, kwargs)
+        startTime = time.time()
+        try:
+            if catName != "rcsb_nonpolymer_instance_validation_score":
+                return False
+            if not dataContainer.exists("entry"):
+                return False
+            #
+            eObj = dataContainer.getObj("entry")
+            entryId = eObj.getValue("id", 0)
+            #
+            # Create the new target category
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            cObj = dataContainer.getObj(catName)
+            ii = cObj.getRowCount()
+            #
+            asymIdD = self.__commonU.getInstanceEntityMap(dataContainer)
+            asymAuthIdD = self.__commonU.getAsymAuthIdMap(dataContainer)
+            #
+            instanceModelValidationD = self.__commonU.getInstanceNonpolymerValidationInfo(dataContainer)
+            #
+            # NonpolymerValidationFields = ("rsr", "rscc", "mogul_bonds_rmsz", "mogul_angles_rmsz", "missing_heavy_atom_count")
+            #
+            logger.debug("Length instanceModelValidationD %d", len(instanceModelValidationD))
+            #
+            ccTargets = self.__commonU.getTargetComponents(dataContainer)
+            #
+            meanD, stdD, loadingD = self.__rlsP.getParameterStatistics()
+            excludeList = self.__rlsP.getLigandExcludeList()
+            rankD = {}
+            scoreD = {}
+            # calculate scores and ranks and find best ranking
+            for (modelId, asymId, compId), vTup in instanceModelValidationD.items():
+                if (asymId not in asymIdD) or (asymId not in asymAuthIdD):
+                    continue
+                numHeavyAtoms = self.__ccP.getAtomCountHeavy(compId)
+                if not numHeavyAtoms:
+                    continue
+                completeness = float(numHeavyAtoms - vTup.missing_heavy_atom_count) / float(numHeavyAtoms)
+                logger.debug("compId %s numHeavyAtoms %d completeness %0.2f", compId, numHeavyAtoms, completeness)
+                #
+                fitScore, fitRanking = self.__calculateFitScore(vTup.rsr, vTup.rscc, completeness, meanD, stdD, loadingD)
+                geoScore, geoRanking = self.__calculateGeometryScore(vTup.mogul_bonds_rmsz, vTup.mogul_angles_rmsz, meanD, stdD, loadingD)
+                #
+                rankD[compId] = (max(fitRanking, rankD[compId][0]), asymId) if compId in rankD else (fitRanking, asymId)
+
+                scoreD[(modelId, asymId, compId)] = (fitScore, fitRanking, geoScore, geoRanking)
+            #
+            for (modelId, asymId, compId), vTup in instanceModelValidationD.items():
+                if (modelId, asymId, compId) not in scoreD:
+                    continue
+                #
+                entityId = asymIdD[asymId]
+                authAsymId = asymAuthIdD[asymId]
+                #
+                cObj.setValue(ii + 1, "ordinal", ii)
+                cObj.setValue(modelId, "model_id", ii)
+                cObj.setValue(entryId, "entry_id", ii)
+                cObj.setValue(entityId, "entity_id", ii)
+                cObj.setValue(asymId, "asym_id", ii)
+                cObj.setValue(authAsymId, "auth_asym_id", ii)
+                cObj.setValue(compId, "comp_id", ii)
+                cObj.setValue("RCSB_LIGAND_QUALITY_2021", "type", ii)
+                #
+                cObj.setValue(vTup.rsr, "RSR", ii)
+                cObj.setValue(vTup.rscc, "RSCC", ii)
+                cObj.setValue(vTup.mogul_angles_rmsz, "mogul_angles_RMSZ", ii)
+                cObj.setValue(vTup.mogul_bonds_rmsz, "mogul_bonds_RMSZ", ii)
+                #
+                sTup = scoreD[(modelId, asymId, compId)]
+                cObj.setValue(sTup[0], "score_model_fit", ii)
+                cObj.setValue(sTup[1], "ranking_model_fit", ii)
+                cObj.setValue(sTup[2], "score_model_geometry", ii)
+                cObj.setValue(sTup[3], "ranking_model_geometry", ii)
+                isBest = "Y" if rankD[compId][1] == asymId else "N"
+                cObj.setValue(isBest, "is_best_instance", ii)
+                #
+                isTarget = "N"
+                if compId in ccTargets:
+                    isTarget = "Y"
+                elif compId in excludeList:
+                    isTarget = "N"
+                elif self.__ccP.getFormulaWeight(compId) and self.__ccP.getFormulaWeight(compId) > 150.0:
+                    isTarget = "Y"
+                cObj.setValue(isTarget, "is_subject_of_investigation", ii)
+                #
+                ii += 1
+                #
+            ##
+            endTime = time.time()
+            logger.debug("Completed at %s (%.4f seconds)", time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - startTime)
+            return True
+        except Exception as e:
+            logger.exception("For %s %r failing with %s", dataContainer.getName(), catName, str(e))
+        return False
+
+    def __calculateFitScore(self, rsr, rscc, completeness, meanD, stdD, loadingD):
+        fitScore = None
+        fitRanking = 0.0
+        try:
+            if rsr and rscc:
+                if completeness < 1.0:
+                    rsr = rsr + 0.08235 * (1.0 - completeness)
+                    rscc = rscc - 0.09652 * (1.0 - completeness)
+                fitScore = ((rsr - meanD["rsr"]) / stdD["rsr"]) * loadingD["rsr"] + ((rscc - meanD["rscc"]) / stdD["rscc"]) * loadingD["rscc"]
+                fitRanking = self.__rlsP.getFitScoreRanking(fitScore)
+        except Exception as e:
+            logger.exception("Failing for rsr %r rscc %r with %s", rsr, rscc, str(e))
+        return fitScore, fitRanking
+
+    def __calculateGeometryScore(self, bondsRmsZ, anglesRmsZ, meanD, stdD, loadingD):
+        geoScore = None
+        geoRanking = 0.0
+        try:
+            if bondsRmsZ and anglesRmsZ:
+                geoScore = ((bondsRmsZ - meanD["mogul_bonds_rmsz"]) / stdD["mogul_bonds_rmsz"]) * loadingD["mogul_bonds_rmsz"] + (
+                    (anglesRmsZ - meanD["mogul_angles_rmsz"]) / stdD["mogul_angles_rmsz"]
+                ) * loadingD["mogul_angles_rmsz"]
+                geoRanking = self.__rlsP.getGeometryScoreRanking(geoScore)
+        except Exception as e:
+            logger.exception("Failing for bondsRmsZ %r anglesRmsZ %r with %r", bondsRmsZ, anglesRmsZ, str(e))
+
+        return geoScore, geoRanking
diff --git a/rcsb/utils/dictionary/DictMethodEntryHelper.py b/rcsb/utils/dictionary/DictMethodEntryHelper.py
new file mode 100644
index 0000000..51c362b
--- /dev/null
+++ b/rcsb/utils/dictionary/DictMethodEntryHelper.py
@@ -0,0 +1,1214 @@
+##
+# File:    DictMethodEntryHelper.py (DictMethodRunnerHelper.py)
+# Author:  J. Westbrook
+# Date:    18-Aug-2018
+# Version: 0.001 Initial version
+#
+#
+# Updates:
+#  4-Sep-2018 jdw add methods to construct entry and entity identier categories.
+# 10-Sep-2018 jdw add method for citation author aggregation
+# 22-Sep-2018 jdw add method assignAssemblyCandidates()
+# 27-Oct-2018 jdw add method consolidateAccessionDetails()
+# 30-Oct-2018 jdw add category methods addChemCompRelated(), addChemCompInfo(),
+#                 addChemCompDescriptor()
+# 10-Nov-2018 jdw add addChemCompSynonyms(), addChemCompTargets(), filterBlockByMethod()
+# 12-Nov-2018 jdw add InChIKey matching in addChemCompRelated()
+# 15-Nov-2018 jdw add handling for antibody misrepresentation of multisource organisms
+# 28-Nov-2018 jdw relax constraints on the production of rcsb_entry_info
+#  1-Dec-2018 jdw add ncbi source and host organism info
+# 11-Dec-2018 jdw add addStructRefSeqEntityIds and buildEntityPolySeq
+# 10-Jan-2019 jdw better handle initialization in filterBlockByMethod()
+# 11-Jan-2019 jdw revise classification in assignAssemblyCandidates()
+# 16-Feb-2019 jdw add buildContainerEntityInstanceIds()
+# 19-Feb-2019 jdw add internal method __addPdbxValidateAsymIds() to add cardinal identifiers to
+#                 pdbx_validate_* categories
+# 28-Feb-2019 jdw change criteria for adding rcsb_chem_comp_container_identiers to work with ion definitions
+# 11-Mar-2019 jdw replace taxonomy file handling with calls to TaxonomyUtils()
+# 11-Mar-2019 jdw add EC lineage using EnzymeDatabaseUtils()
+# 17-Mar-2019 jdw add support for entity subcategory rcsb_macromolecular_names_combined
+# 23-Mar-2019 jdw change criteria chem_comp collection criteria to _chem_comp.pdbx_release_status
+# 25-Mar-2019 jdw remap merged taxons and adjust exception handling for taxonomy lineage generation
+#  7-Apr-2019 jdw add CathClassificationUtils and CathClassificationUtils and sequence difference type counts
+# 25-Apr-2019 jdw For source and host organism add ncbi_parent_scientific_name
+#                 add rcsb_entry_info.deposited_modeled_polymer_monomer_count and
+#                     rcsb_entry_info.deposited_unmodeled_polymer_monomer_count,
+#  1-May-2019 jdw add support for _rcsb_entry_info.deposited_polymer_monomer_count,
+#                   _rcsb_entry_info.polymer_entity_count_protein,
+#                   _rcsb_entry_info.polymer_entity_count_nucleic_acid,
+#                   _rcsb_entry_info.polymer_entity_count_nucleic_acid_hybrid,
+#                   _rcsb_entry_info.polymer_entity_count_DNA,
+#                   _rcsb_entry_info.polymer_entity_count_RNA,
+#                   _rcsb_entry_info.nonpolymer_ligand_entity_count
+#                   _rcsb_entry_info.selected_polymer_entity_types
+#                   _rcsb_entry_info.polymer_entity_taxonomy_count
+#                   _rcsb_entry_info.assembly_count
+#                    add categories rcsb_entity_instance_domain_scop and rcsb_entity_instance_domain_cath
+#  4-May-2019 jdw extend content in categories rcsb_entity_instance_domain_scop and rcsb_entity_instance_domain_cath
+# 13-May-2019 jdw add rcsb_entry_info.deposited_polymer_entity_instance_count and deposited_nonpolymer_entity_instance_count
+#                 add entity_poly.rcsb_non_std_monomer_count and rcsb_non_std_monomers
+# 15-May-2019 jdw add _rcsb_entry_info.na_polymer_entity_types update enumerations for _rcsb_entry_info.selected_polymer_entity_types
+# 19-May-2019 jdw add method __getStructConfInfo()
+# 21-May-2019 jdw handle odd ordering of records in struct_ref_seq_dif.
+# 25-Nov-2019 jdw add method normalizeCitiationJournalAbbrev() and dependencies
+#
+##
+"""
+Helper class implements entry-level method references in the RCSB dictionary extension.
+
+All data accessors and structures here refer to dictionary category and attribute names.
+
+"""
+__docformat__ = "restructuredtext en"
+__author__ = "John Westbrook"
+__email__ = "jwest@rcsb.rutgers.edu"
+__license__ = "Apache 2.0"
+
+# pylint: disable=too-many-lines
+
+import logging
+from string import capwords
+
+from mmcif.api.DataCategory import DataCategory
+
+logger = logging.getLogger(__name__)
+
+
+def cmpElements(lhs, rhs):
+    return 0 if (lhs[-1].isdigit() or lhs[-1] in ["R", "S"]) and rhs[0].isdigit() else -1
+
+
+class DictMethodEntryHelper(object):
+    """Helper class implements entry-level method references in the RCSB dictionary extension."""
+
+    def __init__(self, **kwargs):
+        """
+        Args:
+            resourceProvider: (obj) instance of DictMethodResourceProvider()
+
+        """
+        #
+        logger.debug("Dictionary entry method helper init with kwargs %r", kwargs)
+        self._raiseExceptions = kwargs.get("raiseExceptions", False)
+        #
+        rP = kwargs.get("resourceProvider")
+        self.__commonU = rP.getResource("DictMethodCommonUtils instance") if rP else None
+        self.__dApi = rP.getResource("Dictionary API instance (pdbx_core)") if rP else None
+        #
+        self.__crP = rP.getResource("CitationReferenceProvider instance") if rP else None
+        self.__jtaP = rP.getResource("JournalTitleAbbreviationProvider instance") if rP else None
+        #
+        # logger.debug("Dictionary entry method helper init")
+
+    def echo(self, msg):
+        logger.info(msg)
+
+    def deferredItemMethod(self, dataContainer, catName, atName, **kwargs):
+        """Placeholder for an item method."""
+        _ = kwargs
+        logger.debug("Called deferred item method %r %r for %r", catName, atName, dataContainer.getName())
+        return True
+
+    def deferredCategoryMethod(self, dataContainer, catName, **kwargs):
+        """Placeholder for a category method."""
+        _ = kwargs
+        logger.debug("Called deferred category method %r for %r", catName, dataContainer.getName())
+        return True
+
+    def setDatablockId(self, dataContainer, catName, atName, **kwargs):
+        """Item-level method to set the value of the input item to the current container name.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+            atName (str): Attribute name
+
+        Returns:
+            bool: True for success or False otherwise
+        """
+        logger.debug("Starting catName %s atName %s kwargs %r", catName, atName, kwargs)
+        try:
+            val = dataContainer.getName()
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=[atName]))
+            #
+            cObj = dataContainer.getObj(catName)
+            if not cObj.hasAttribute(atName):
+                cObj.appendAttribute(atName)
+            #
+            rc = cObj.getRowCount()
+            numRows = rc if rc else 1
+            for ii in range(numRows):
+                cObj.setValue(val, atName, ii)
+            return True
+        except Exception as e:
+            logger.exception("Failing with %s", str(e))
+        return False
+
+    def setLoadDateTime(self, dataContainer, catName, atName, **kwargs):
+        """Set the value of the input data item with container load date.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+            atName (str): Attribute name
+
+        Returns:
+            bool: True for success or False otherwise
+        """
+        logger.debug("Starting catName %s atName %s kwargs %r", catName, atName, kwargs)
+        try:
+            val = dataContainer.getProp("load_date")
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=[atName]))
+            #
+            cObj = dataContainer.getObj(catName)
+            if not cObj.hasAttribute(atName):
+                cObj.appendAttribute(atName)
+            #
+            rc = cObj.getRowCount()
+            numRows = rc if rc else 1
+            for ii in range(numRows):
+                cObj.setValue(val, atName, ii)
+            return True
+        except Exception as e:
+            logger.exception("Failing with %s", str(e))
+        return False
+
+    def setLocator(self, dataContainer, catName, atName, **kwargs):
+        """Set the value of the input data item with container locator path.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+            atName (str): Attribute name
+
+        Returns:
+            bool: True for success or False otherwise
+        """
+        logger.debug("Starting catName %s atName %s kwargs %r", catName, atName, kwargs)
+        try:
+            val = dataContainer.getProp("locator")
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=[atName]))
+            #
+            cObj = dataContainer.getObj(catName)
+            if not cObj.hasAttribute(atName):
+                cObj.appendAttribute(atName)
+            #
+            rc = cObj.getRowCount()
+            numRows = rc if rc else 1
+            for ii in range(numRows):
+                cObj.setValue(val, atName, ii)
+            return True
+        except Exception as e:
+            logger.exception("Failing with %s", str(e))
+        return False
+
+    def setRowIndex(self, dataContainer, catName, atName, **kwargs):
+        """Set the values of the input data item with the category row index.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+            atName (str): Attribute name
+
+        Returns:
+            bool: True for success or False otherwise
+        """
+        logger.debug("Starting catName %s atName %s kwargs %r", catName, atName, kwargs)
+        try:
+            if not dataContainer.exists(catName):
+                # exit if there is no category to index
+                return False
+            #
+            cObj = dataContainer.getObj(catName)
+            if not cObj.hasAttribute(atName):
+                cObj.appendAttribute(atName)
+            #
+            rc = cObj.getRowCount()
+            numRows = rc if rc else 1
+            for ii, iRow in enumerate(range(numRows), 1):
+                # Note - we set the integer value as a string  -
+                cObj.setValue(str(ii), atName, iRow)
+            return True
+        except Exception as e:
+            logger.exception("Failing with %s", str(e))
+        return False
+
+    def aggregateCitationOrcidIdentifiers(self, dataContainer, catName, atName, **kwargs):
+        """Set the value of the input data item with list of citation authors.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+            atName (str): Attribute name
+
+        Returns:
+            bool: True for success or False otherwise
+        """
+        logger.debug("Starting catName %s atName %s kwargs %r", catName, atName, kwargs)
+        try:
+            if not dataContainer.exists(catName) or not dataContainer.exists("citation_author"):
+                return False
+            #
+            cObj = dataContainer.getObj(catName)
+            if not cObj.hasAttribute(atName):
+                cObj.appendAttribute(atName)
+            citIdL = cObj.getAttributeValueList("id")
+            #
+            tObj = dataContainer.getObj("citation_author")
+            #
+
+            citIdL = list(set(citIdL))
+            tD = {}
+            for ii, citId in enumerate(citIdL):
+                if tObj.hasAttribute("identifier_ORCID"):
+                    tD[citId] = tObj.selectValuesWhere("identifier_ORCID", citId, "citation_id")
+                else:
+                    tD[citId] = []
+            for ii in range(cObj.getRowCount()):
+                citId = cObj.getValue("id", ii)
+                if tD[citId]:
+                    cObj.setValue(",".join(tD[citId]), atName, ii)
+                else:
+                    cObj.setValue("?", atName, ii)
+            return True
+        except Exception as e:
+            logger.exception("Failing for %r with %s", dataContainer.getName(), str(e))
+        return False
+
+    def aggregateCitationAuthors(self, dataContainer, catName, atName, **kwargs):
+        """Set the value of the input data item with list of citation authors.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+            atName (str): Attribute name
+
+        Returns:
+            bool: True for success or False otherwise
+        """
+        logger.debug("Starting catName %s atName %s kwargs %r", catName, atName, kwargs)
+        try:
+            if not dataContainer.exists(catName) or not dataContainer.exists("citation_author"):
+                return False
+            #
+            cObj = dataContainer.getObj(catName)
+            if not cObj.hasAttribute(atName):
+                cObj.appendAttribute(atName)
+            citIdL = cObj.getAttributeValueList("id")
+            #
+            tObj = dataContainer.getObj("citation_author")
+            #
+            citIdL = list(set(citIdL))
+            tD = {}
+            for ii, citId in enumerate(citIdL):
+                tD[citId] = tObj.selectValuesWhere("name", citId, "citation_id")
+            for ii in range(cObj.getRowCount()):
+                citId = cObj.getValue("id", ii)
+                cObj.setValue("|".join(tD[citId]), atName, ii)
+            return True
+        except Exception as e:
+            logger.exception("Failing for %r with %s", dataContainer.getName(), str(e))
+        return False
+
+    def normalizeCitationJournalAbbrev(self, dataContainer, catName, atName, **kwargs):
+        """Normalize citation journal abbrev.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+            atName (str): Attribute name
+
+        Returns:
+            bool: True for success or False otherwise
+        """
+        logger.debug("Starting catName %s atName %s kwargs %r", catName, atName, kwargs)
+        revAbbrev = None
+        try:
+            if not dataContainer.exists(catName):
+                return False
+            #
+            cObj = dataContainer.getObj(catName)
+            if not cObj.hasAttribute("journal_abbrev") or not cObj.hasAttribute("id") or not cObj.hasAttribute("journal_id_ISSN"):
+                return False
+            #
+            if not cObj.hasAttribute(atName):
+                cObj.appendAttribute(atName)
+            #
+            rcsbId = dataContainer.getName()
+            for ii in range(cObj.getRowCount()):
+                # citId = cObj.getValue("id", ii)
+                issn = cObj.getValueOrDefault("journal_id_ISSN", ii, defaultValue=None)
+                curAbbrev = cObj.getValueOrDefault("journal_abbrev", ii, defaultValue=None)
+                if curAbbrev:
+                    revAbbrev = self.__updateJournalAbbreviation(rcsbId, issn, curAbbrev)
+                revAbbrev = revAbbrev if revAbbrev else curAbbrev
+                #
+                logger.debug("%s journal abbreviation issn %r current %r normalized %r", rcsbId, issn, curAbbrev, revAbbrev)
+                cObj.setValue(revAbbrev, atName, ii)
+            return True
+        except Exception as e:
+            logger.exception("Failing for %r with %s", dataContainer.getName(), str(e))
+        return False
+
+    def __updateJournalAbbreviation(self, rcsbId, issn, curAbbrev):
+        revAbbrev = None
+        try:
+            if issn:
+                medlineAbbrev = self.__crP.getMedlineJournalAbbreviation(issn)
+                # medlineIsoAbbrev = self.__crP.getMedlineJournalIsoAbbreviation(issn)
+                crIssn = issn.replace("-", "")
+                crTitle = self.__crP.getCrossRefJournalTitle(crIssn)
+                #
+                revAbbrev = medlineAbbrev
+                if not medlineAbbrev and not crTitle:
+                    logger.debug("%s: missing information for issn %r curAbbrev %r", rcsbId, issn, curAbbrev)
+                    revAbbrev = capwords(curAbbrev.replace(".", " "))
+                elif not medlineAbbrev:
+                    revAbbrev = self.__jtaP.getJournalAbbreviation(crTitle, usePunctuation=False)
+            else:
+                if curAbbrev.upper() in ["TO BE PUBLISHED", "IN PREPARATION"]:
+                    revAbbrev = "To be published"
+                elif curAbbrev.upper().startswith("THESIS"):
+                    revAbbrev = "Thesis"
+                else:
+                    revAbbrev = capwords(curAbbrev.replace(".", " "))
+                    logger.debug("%r: missing issn and non-standard abbrev for %r", rcsbId, curAbbrev)
+
+                if not curAbbrev:
+                    logger.info("%r: missing issn and journal abbrev", rcsbId)
+                #
+            logger.debug("%s: revised: %r current: %r", rcsbId, revAbbrev, curAbbrev)
+        except Exception as e:
+            logger.exception("Failing on %r %r %r with %r", rcsbId, issn, curAbbrev, str(e))
+
+        return revAbbrev
+
+    def assignPrimaryCitation(self, dataContainer, catName, atName, **kwargs):
+        """Normalize citation journal abbrev.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+            atName (str): Attribute name
+
+        Returns:
+            bool: True for success or False otherwise
+        """
+        logger.debug("Starting catName %s atName %s kwargs %r", catName, atName, kwargs)
+        try:
+            if not dataContainer.exists(catName):
+                return False
+            #
+            cObj = dataContainer.getObj(catName)
+            if not cObj.hasAttribute(atName):
+                cObj.appendAttribute(atName)
+            #
+            for ii in range(cObj.getRowCount()):
+                citId = cObj.getValue("id", ii)
+                if citId.upper() == "PRIMARY":
+                    cObj.setValue("Y", atName, ii)
+                else:
+                    cObj.setValue("N", atName, ii)
+            return True
+        except Exception as e:
+            logger.exception("Failing for %r with %s", dataContainer.getName(), str(e))
+        return False
+
+    def __getEmdbIdentifiers(self, dataContainer):
+        """[summary]
+
+        Args:
+            dataContainer ([type]): [description]
+
+        Returns:
+            [type]: [description]
+
+            #
+            loop_
+            _database_2.database_id
+            _database_2.database_code
+            PDB   6QUY
+            WWPDB D_1292100913
+            EMDB  EMD-4644
+            #
+            loop_
+            _pdbx_database_related.db_name
+            _pdbx_database_related.details
+            _pdbx_database_related.db_id
+            _pdbx_database_related.content_type
+            EMDB 'HsCKK (human CAMSAP1) decorated 13pf taxol-GDP microtubule (asymmetric unit)' EMD-4643 'other EM volume'
+            PDB  'HsCKK (human CAMSAP1) decorated 13pf taxol-GDP microtubule (asymmetric unit)' 6QUS     unspecified
+            EMDB 'NgCKK (N.Gruberi CKK) decorated 13pf taxol-GDP microtubule'                   EMD-4644 'associated EM volume'
+            #
+        """
+        emdbIdD = {}
+        emdbIdAltD = {}
+        if dataContainer.exists("database_2"):
+            dbObj = dataContainer.getObj("database_2")
+            for ii in range(dbObj.getRowCount()):
+                dbId = dbObj.getValue("database_id", ii)
+                dbCode = dbObj.getValue("database_code", ii)
+                if dbId.upper() == "EMDB":
+                    emdbIdD[dbCode] = "associated EM volume"
+
+        if dataContainer.exists("pdbx_database_related"):
+            drObj = dataContainer.getObj("pdbx_database_related")
+            for ii in range(drObj.getRowCount()):
+                dbCode = drObj.getValue("db_id", ii)
+                dbName = drObj.getValue("db_name", ii)
+                contentType = drObj.getValue("content_type", ii)
+                if dbName.upper() == "EMDB" and contentType.upper() == "ASSOCIATED EM VOLUME" and dbCode not in emdbIdD:
+                    emdbIdD[dbCode] = "associated EM volume"
+                elif dbName.upper() == "EMDB" and contentType.upper() != "ASSOCIATED EM VOLUME" and dbCode not in emdbIdAltD:
+                    emdbIdAltD[dbCode] = contentType
+        return emdbIdD, emdbIdAltD
+
+    def buildContainerEntryIds(self, dataContainer, catName, **kwargs):
+        """Load the input category with rcsb_entry_container_identifiers content.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+
+        Returns:
+            bool: True for success or False otherwise
+
+        For example:
+
+        loop_
+        _rcsb_entry_container_identifiers.entry_id
+        _rcsb_entry_container_identifiers.entity_ids
+        _rcsb_entry_container_identifiers.polymer_entity_ids_polymer
+        _rcsb_entry_container_identifiers.non-polymer_entity_ids
+        _rcsb_entry_container_identifiers.assembly_ids
+        _rcsb_entry_container_identifiers.rcsb_id
+        ...
+
+        """
+        logger.debug("Starting catName  %s kwargs %r", catName, kwargs)
+        try:
+            if not dataContainer.exists("entry"):
+                return False
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            #
+            cObj = dataContainer.getObj(catName)
+
+            tObj = dataContainer.getObj("entry")
+            entryId = tObj.getValue("id", 0)
+            cObj.setValue(entryId, "entry_id", 0)
+            cObj.setValue(entryId, "rcsb_id", 0)
+
+            #
+            tObj = dataContainer.getObj("entity")
+            entityIdL = tObj.getAttributeValueList("id")
+            cObj.setValue(",".join(entityIdL), "entity_ids", 0)
+            #
+            #
+            tIdL = tObj.selectValuesWhere("id", "polymer", "type")
+            tV = ",".join(tIdL) if tIdL else "?"
+            cObj.setValue(tV, "polymer_entity_ids", 0)
+
+            tIdL = tObj.selectValuesWhere("id", "non-polymer", "type")
+            tV = ",".join(tIdL) if tIdL else "?"
+            cObj.setValue(tV, "non-polymer_entity_ids", 0)
+            #
+            tIdL = tObj.selectValuesWhere("id", "branched", "type")
+            tV = ",".join(tIdL) if tIdL else "?"
+            cObj.setValue(tV, "branched_entity_ids", 0)
+            #
+            # tIdL = tObj.selectValuesWhere("id", "water", "type")
+            # tV = ",".join(tIdL) if tIdL else "?"
+            # cObj.setValue(tV, "water_entity_ids", 0)
+            #
+            tObj = dataContainer.getObj("pdbx_struct_assembly")
+            assemblyIdL = tObj.getAttributeValueList("id") if tObj else []
+            tV = ",".join(assemblyIdL) if assemblyIdL else "?"
+            cObj.setValue(tV, "assembly_ids", 0)
+            #
+            #
+            emdbIdD, emdbIdAltD = self.__getEmdbIdentifiers(dataContainer)
+            tV = ",".join([tId for tId in emdbIdD]) if emdbIdD else "?"
+            cObj.setValue(tV, "emdb_ids", 0)
+            tV = ",".join([tId for tId in emdbIdAltD]) if emdbIdAltD else "?"
+            cObj.setValue(tV, "related_emdb_ids", 0)
+            #
+            modelIdList = self.__commonU.getModelIdList(dataContainer)
+            tV = ",".join([str(tId) for tId in modelIdList]) if modelIdList else "?"
+            cObj.setValue(tV, "model_ids", 0)
+            #
+            return True
+        except Exception as e:
+            logger.exception("For %s failing with %s", catName, str(e))
+        return False
+
+    def consolidateAccessionDetails(self, dataContainer, catName, **kwargs):
+        """Consolidate accession details into the rcsb_accession_info category. Also include
+        a flag for the availability of any supporting experimental data.
+
+        Args:
+            dataContainer (object): mmif.api.DataContainer object instance
+            catName (str): Category name
+
+        Returns:
+            bool: True for success or False otherwise
+
+        For example:
+            For example -
+             _rcsb_accession_info.entry_id                1ABC
+             _rcsb_accession_info.status_code             REL
+             _rcsb_accession_info.deposit_date            2018-01-11
+             _rcsb_accession_info.initial_release_date    2018-03-23
+             _rcsb_accession_info.major_revision          1
+             _rcsb_accession_info.minor_revision          2
+             _rcsb_accession_info.revision_date           2018-10-25
+
+
+            Taking data values from:
+
+            _pdbx_database_status.entry_id                        3OQP
+            _pdbx_database_status.deposit_site                    RCSB
+            _pdbx_database_status.process_site                    RCSB
+            _pdbx_database_status.recvd_initial_deposition_date   2010-09-03
+            _pdbx_database_status.status_code                     REL
+            _pdbx_database_status.status_code_sf                  REL
+            _pdbx_database_status.status_code_mr                  ?
+            _pdbx_database_status.status_code_cs                  ?
+            _pdbx_database_status.pdb_format_compatible           Y
+            _pdbx_database_status.methods_development_category    ?
+            _pdbx_database_status.SG_entry                        Y
+            #
+            loop_
+            _pdbx_audit_revision_history.ordinal
+            _pdbx_audit_revision_history.data_content_type
+            _pdbx_audit_revision_history.major_revision
+            _pdbx_audit_revision_history.minor_revision
+            _pdbx_audit_revision_history.revision_date
+            1 'Structure model' 1 0 2010-10-13
+            2 'Structure model' 1 1 2011-07-13
+            3 'Structure model' 1 2 2011-07-20
+            4 'Structure model' 1 3 2014-11-12
+            5 'Structure model' 1 4 2017-10-25
+            #
+
+            #  - For EM and SAS -
+            _pdbx_database_related.db_name        EMDB
+            _pdbx_database_related.details
+            'pseudo-atomic model of the RNA polymerase lambda-based antitermination complex solved by cryo-EM'
+            _pdbx_database_related.db_id          EMD-3561
+            _pdbx_database_related.content_type   'associated EM volume'
+        """
+        ##
+        try:
+            logger.debug("Starting with  %r %r %r", dataContainer.getName(), catName, kwargs)
+            #
+            # if there is incomplete accessioninformation then exit
+            if not (dataContainer.exists("pdbx_database_status") or dataContainer.exists("pdbx_audit_revision_history")):
+                return False
+            # Create the new target category
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+
+            cObj = dataContainer.getObj(catName)
+            #
+            tObj = dataContainer.getObj("pdbx_database_status")
+            entryId = tObj.getValue("entry_id", 0)
+            statusCode = tObj.getValue("status_code", 0)
+            depositDate = tObj.getValue("recvd_initial_deposition_date", 0)
+            #
+            cObj.setValue(entryId, "entry_id", 0)
+            cObj.setValue(statusCode, "status_code", 0)
+            cObj.setValue(depositDate, "deposit_date", 0)
+            # cObj.setValue(depositDate[:4], "deposit_year", 0)
+            #
+            # -- Experimental data availability --
+            #
+            expDataRelFlag = "N"
+            statusSf = tObj.getValueOrDefault("status_code_sf", 0, defaultValue=None)
+            statusMr = tObj.getValueOrDefault("status_code_mr", 0, defaultValue=None)
+            statusCs = tObj.getValueOrDefault("status_code_cs", 0, defaultValue=None)
+            #
+            if statusSf == "REL" or statusMr == "REL" or statusCs == "REL":
+                expDataRelFlag = "Y"
+            else:
+                if dataContainer.exists("pdbx_database_related"):
+                    rObj = dataContainer.getObj("pdbx_database_related")
+                    ctL = rObj.getAttributeValueList("content_type")
+                    if "associated EM volume" in ctL or "associated SAS data" in ctL:
+                        expDataRelFlag = "Y"
+            #
+            cObj.setValue(expDataRelFlag, "has_released_experimental_data", 0)
+            #
+            tObj = dataContainer.getObj("pdbx_audit_revision_history")
+            nRows = tObj.getRowCount()
+            # Assuming the default sorting order from the release module -
+            releaseDate = tObj.getValue("revision_date", 0)
+            minorRevision = tObj.getValue("minor_revision", nRows - 1)
+            majorRevision = tObj.getValue("major_revision", nRows - 1)
+            revisionDate = tObj.getValue("revision_date", nRows - 1)
+            cObj.setValue(releaseDate, "initial_release_date", 0)
+            # cObj.setValue(releaseDate[:4], "initial_release_year", 0)
+            cObj.setValue(minorRevision, "minor_revision", 0)
+            cObj.setValue(majorRevision, "major_revision", 0)
+            cObj.setValue(revisionDate, "revision_date", 0)
+            #
+            return True
+        except Exception as e:
+            logger.exception("In %s for %s failing with %s", dataContainer.getName(), catName, str(e))
+        return False
+
+    def addEntryInfo(self, dataContainer, catName, **kwargs):
+        """
+        Add  _rcsb_entry_info, for example:
+             _rcsb_entry_info.entry_id                      1ABC
+             _rcsb_entry_info.polymer_composition           'heteromeric protein'
+             _rcsb_entry_info.experimental_method           'multiple methods'
+             _rcsb_entry_info.experimental_method_count     2
+             _rcsb_entry_info.polymer_entity_count          2
+             _rcsb_entry_info.entity_count                  2
+             _rcsb_entry_info.nonpolymer_entity_count       2
+             _rcsb_entry_info.branched_entity_count         0
+             _rcsb_entry_info.software_programs_combined    'Phenix;RefMac'
+             ....
+
+        Also add the related field:
+
+        _entity_poly.rcsb_entity_polymer_type
+
+            'Protein'   'polypeptide(D) or polypeptide(L)'
+            'DNA'       'polydeoxyribonucleotide'
+            'RNA'       'polyribonucleotide'
+            'NA-hybrid' 'polydeoxyribonucleotide/polyribonucleotide hybrid'
+            'Other'      'polysaccharide(D), polysaccharide(L), cyclic-pseudo-peptide, peptide nucleic acid, or other'
+            #
+          _rcsb_entry_info.deposited_polymer_monomer_count
+          'polymer_entity_count_protein',
+          'polymer_entity_count_nucleic_acid',
+          'polymer_entity_count_nucleic_acid_hybrid',
+          'polymer_entity_count_DNA',
+          'polymer_entity_count_RNA',
+
+        """
+        try:
+            logger.debug("Starting with %r %r %r", dataContainer.getName(), catName, kwargs)
+            # Exit if source categories are missing
+            if not (dataContainer.exists("exptl") and dataContainer.exists("entity")):
+                return False
+            #
+            # Create the new target category rcsb_entry_info
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            # --------------------------------------------------------------------------------------------------------
+            # catName = rcsb_entry_info
+            cObj = dataContainer.getObj(catName)
+            #
+            # --------------------------------------------------------------------------------------------------------
+            #  Filter experimental methods
+            #
+            xObj = dataContainer.getObj("exptl")
+            entryId = xObj.getValue("entry_id", 0)
+            methodL = xObj.getAttributeValueList("method")
+            methodCount, expMethod = self.__commonU.filterExperimentalMethod(methodL)
+            cObj.setValue(entryId, "entry_id", 0)
+            cObj.setValue(expMethod, "experimental_method", 0)
+            cObj.setValue(methodCount, "experimental_method_count", 0)
+            #
+            # --------------------------------------------------------------------------------------------------------
+            #  Experimental resolution -
+            #
+            resL = self.__filterExperimentalResolution(dataContainer)
+            if resL:
+                cObj.setValue(",".join(resL), "resolution_combined", 0)
+            #
+            # ---------------------------------------------------------------------------------------------------------
+            # Consolidate software details -
+            #
+            swNameL = []
+            if dataContainer.exists("software"):
+                swObj = dataContainer.getObj("software")
+                swNameL.extend(swObj.getAttributeUniqueValueList("name"))
+            if dataContainer.exists("pdbx_nmr_software"):
+                swObj = dataContainer.getObj("pdbx_nmr_software")
+                swNameL.extend(swObj.getAttributeUniqueValueList("name"))
+            if dataContainer.exists("em_software"):
+                swObj = dataContainer.getObj("em_software")
+                swNameL.extend(swObj.getAttributeUniqueValueList("name"))
+            if swNameL:
+                swNameD = {swName.upper().strip(): True for swName in swNameL if swName not in [".", "?"]}
+                swNameL = sorted(swNameD.keys())
+                cObj.setValue(";".join(swNameL), "software_programs_combined", 0)
+            # ---------------------------------------------------------------------------------------------------------
+            #  ENTITY FEATURES
+            #
+            #  entity and polymer entity counts -
+            ##
+            eObj = dataContainer.getObj("entity")
+            eTypeL = eObj.getAttributeValueList("type")
+            #
+            numPolymers = 0
+            numNonPolymers = 0
+            numBranched = 0
+            numSolvent = 0
+            for eType in eTypeL:
+                if eType == "polymer":
+                    numPolymers += 1
+                elif eType == "non-polymer":
+                    numNonPolymers += 1
+                elif eType == "branched":
+                    numBranched += 1
+                elif eType == "water":
+                    numSolvent += 1
+                else:
+                    logger.error("Unexpected entity type for %s %s", dataContainer.getName(), eType)
+            totalEntities = numPolymers + numNonPolymers + numBranched + numSolvent
+            #
+            # Simplified entity polymer type: 'Protein', 'DNA', 'RNA', 'NA-hybrid', or 'Other'
+            pTypeL = []
+            if dataContainer.exists("entity_poly"):
+                epObj = dataContainer.getObj("entity_poly")
+                pTypeL = epObj.getAttributeValueList("type")
+                #
+                atName = "rcsb_entity_polymer_type"
+                if not epObj.hasAttribute(atName):
+                    epObj.appendAttribute(atName)
+                for ii in range(epObj.getRowCount()):
+                    epObj.setValue(self.__commonU.filterEntityPolyType(pTypeL[ii]), atName, ii)
+            #
+            # Add any branched entity types to the type list -
+            if dataContainer.exists("pdbx_entity_branch"):
+                ebObj = dataContainer.getObj("pdbx_entity_branch")
+                pTypeL.extend(ebObj.getAttributeValueList("type"))
+            #
+            polymerCompClass, ptClass, naClass, eptD = self.__commonU.getPolymerComposition(pTypeL)
+            if eptD and len(eptD) > 2:
+                logger.debug("%s entity type count=%d class=%s typeD %r", dataContainer.getName(), len(eptD), polymerCompClass, eptD)
+            #
+            cObj.setValue(polymerCompClass, "polymer_composition", 0)
+            cObj.setValue(ptClass, "selected_polymer_entity_types", 0)
+            cObj.setValue(naClass, "na_polymer_entity_types", 0)
+            cObj.setValue(numPolymers, "polymer_entity_count", 0)
+            cObj.setValue(numNonPolymers, "nonpolymer_entity_count", 0)
+            cObj.setValue(numBranched, "branched_entity_count", 0)
+            cObj.setValue(numSolvent, "solvent_entity_count", 0)
+            cObj.setValue(totalEntities, "entity_count", 0)
+            #
+            num = eptD["protein"] if "protein" in eptD else 0
+            cObj.setValue(num, "polymer_entity_count_protein", 0)
+            #
+            num = eptD["NA-hybrid"] if "NA-hybrid" in eptD else 0
+            cObj.setValue(num, "polymer_entity_count_nucleic_acid_hybrid", 0)
+            #
+            numDNA = eptD["DNA"] if "DNA" in eptD else 0
+            cObj.setValue(numDNA, "polymer_entity_count_DNA", 0)
+            #
+            numRNA = eptD["RNA"] if "RNA" in eptD else 0
+            cObj.setValue(numRNA, "polymer_entity_count_RNA", 0)
+            cObj.setValue(numDNA + numRNA, "polymer_entity_count_nucleic_acid", 0)
+            #
+            # ---------------------------------------------------------------------------------------------------------
+            # INSTANCE FEATURES
+            #
+            ##
+            repModelL = ["1"]
+            if self.__commonU.hasMethodNMR(methodL):
+                repModelL = self.__getRepresentativeModels(dataContainer)
+            logger.debug("Representative model list %r", repModelL)
+            #
+            instanceTypeCountD = self.__commonU.getInstanceTypeCounts(dataContainer)
+            cObj.setValue(instanceTypeCountD["polymer"], "deposited_polymer_entity_instance_count", 0)
+            cObj.setValue(instanceTypeCountD["non-polymer"], "deposited_nonpolymer_entity_instance_count", 0)
+
+            #
+            # Various atom counts -
+            #
+            repModelId = repModelL[0]
+            numHeavyAtomsModel, numHydrogenAtomsModel, numAtomsTotal, numModelsTotal = self.__commonU.getDepositedAtomCounts(dataContainer, modelId=repModelId)
+            #
+            logger.debug("numAtomsTotal %d numHeavyAtomsModel %d numModelsTotal %d", numAtomsTotal, numHeavyAtomsModel, numModelsTotal)
+            logger.debug("entity type atom counts %r", self.__commonU.getEntityTypeHeavyAtomCounts(dataContainer, modelId=repModelId))
+            logger.debug("instance atom counts %r", self.__commonU.getEntityTypeHeavyAtomCounts(dataContainer, modelId=repModelId))
+            #
+
+            if numHeavyAtomsModel > 0:
+                cObj.setValue(numHeavyAtomsModel, "deposited_atom_count", 0)
+                cObj.setValue(numModelsTotal, "deposited_model_count", 0)
+                cObj.setValue(numHydrogenAtomsModel, "deposited_hydrogen_atom_count", 0)
+                tCD = self.__commonU.getEntityTypeHeavyAtomCounts(dataContainer, modelId=repModelId)
+                wCount = tCD["water"] if tCD and "water" in tCD else 0
+                cObj.setValue(wCount, "deposited_solvent_atom_count", 0)
+            #
+            # ---------------------------------------------------------------------------------------------------------
+            #  Deposited monomer/residue instance counts
+            #
+            #  Get modeled and unmodeled residue counts
+            #
+            modeledCount, unModeledCount = self.__commonU.getDepositedMonomerCounts(dataContainer, modelId=repModelId)
+            cObj.setValue(modeledCount, "deposited_modeled_polymer_monomer_count", 0)
+            cObj.setValue(unModeledCount, "deposited_unmodeled_polymer_monomer_count", 0)
+            cObj.setValue(modeledCount + unModeledCount, "deposited_polymer_monomer_count", 0)
+            #
+            # ---------------------------------------------------------------------------------------------------------
+            #  Counts of intermolecular bonds/linkages
+            #
+            #
+            bCountsD = self.__commonU.getInstanceConnectionCounts(dataContainer)
+            cObj.setValue(bCountsD["disulf"], "disulfide_bond_count", 0)
+            cObj.setValue(bCountsD["metalc"], "inter_mol_metalic_bond_count", 0)
+            cObj.setValue(bCountsD["covale"], "inter_mol_covalent_bond_count", 0)
+            #
+            cisPeptideD = self.__commonU.getCisPeptides(dataContainer)
+            cObj.setValue(len(cisPeptideD), "cis_peptide_count", 0)
+            #
+            # This is reset in anothor method - filterSourceOrganismDetails()
+            cObj.setValue(None, "polymer_entity_taxonomy_count", 0)
+            #
+            fw = self.__commonU.getFormulaWeightNonSolvent(dataContainer)
+            cObj.setValue(str(round(fw, 2)), "molecular_weight", 0)
+            #
+            # nonpolymer_bound_components
+            #
+            bcL = self.__commonU.getBoundNonpolymersComponentIds(dataContainer)
+            if bcL:
+                cObj.setValue(";".join(bcL), "nonpolymer_bound_components", 0)
+            #
+            # polymer_molecular_weight_minimum
+            # polymer_molecular_weight_maximum
+            # nonpolymer_molecular_weight_minimum
+            # nonpolymer_molecular_weight_maximum
+            # branched_molecular_weight_minimum
+            # branched_molecular_weight_maximum
+            #
+            fwBoundD = self.__commonU.getEntityFormulaWeightBounds(dataContainer)
+            if "polymer" in fwBoundD and fwBoundD["polymer"]["min"] and fwBoundD["polymer"]["max"]:
+                cObj.setValue(str(round(fwBoundD["polymer"]["min"], 2)), "polymer_molecular_weight_minimum", 0)
+                cObj.setValue(str(round(fwBoundD["polymer"]["max"], 2)), "polymer_molecular_weight_maximum", 0)
+            if "non-polymer" in fwBoundD and fwBoundD["non-polymer"]["min"] and fwBoundD["non-polymer"]["max"]:
+                cObj.setValue(str(round(fwBoundD["non-polymer"]["min"], 2)), "nonpolymer_molecular_weight_minimum", 0)
+                cObj.setValue(str(round(fwBoundD["non-polymer"]["max"], 2)), "nonpolymer_molecular_weight_maximum", 0)
+            if "branched" in fwBoundD and fwBoundD["branched"]["min"] and fwBoundD["branched"]["max"]:
+                cObj.setValue(str(round(fwBoundD["branched"]["min"], 2)), "branched_molecular_weight_minimum", 0)
+                cObj.setValue(str(round(fwBoundD["branched"]["max"], 2)), "branched_molecular_weight_maximum", 0)
+            #
+            # polymer_monomer_count_maximum
+            # polymer_monomer_count_minimum
+            #
+            polymerLengthBounds = self.__commonU.getEntityPolymerLengthBounds(dataContainer)
+            if polymerLengthBounds:
+                cObj.setValue(str(polymerLengthBounds[0]), "polymer_monomer_count_minimum", 0)
+                cObj.setValue(str(polymerLengthBounds[1]), "polymer_monomer_count_maximum", 0)
+            #
+            # ---------------------------------------------------------------------------------------------------------
+            # Consolidate diffraction wavelength details -
+            wL = []
+            try:
+                if dataContainer.exists("diffrn_radiation_wavelength"):
+                    swObj = dataContainer.getObj("diffrn_radiation_wavelength")
+                    wL.extend(swObj.getAttributeUniqueValueList("wavelength"))
+                if dataContainer.exists("diffrn_radiation"):
+                    swObj = dataContainer.getObj("diffrn_radiation")
+                    if swObj.hasAttribute("pdbx_wavelength"):
+                        wL.extend(swObj.getAttributeUniqueValueList("pdbx_wavelength"))
+                    if swObj.hasAttribute("pdbx_wavelength_list"):
+                        tL = []
+                        for tS in swObj.getAttributeUniqueValueList("pdbx_wavelength_list"):
+                            tL.extend(tS.split(","))
+                        if tL:
+                            wL.extend(tL)
+                if dataContainer.exists("diffrn_source"):
+                    swObj = dataContainer.getObj("diffrn_source")
+                    if swObj.hasAttribute("pdbx_wavelength"):
+                        wL.extend(swObj.getAttributeUniqueValueList("pdbx_wavelength"))
+                    if swObj.hasAttribute("pdbx_wavelength_list"):
+                        tL = []
+                        for tS in swObj.getAttributeUniqueValueList("pdbx_wavelength_list"):
+                            tL.extend(tS.split(","))
+                        if tL:
+                            wL.extend(tL)
+                fL = []
+                for wS in wL:
+                    try:
+                        fL.append(float(wS))
+                    except Exception:
+                        pass
+                if fL:
+                    cObj.setValue("%.4f" % min(fL), "diffrn_radiation_wavelength_minimum", 0)
+                    cObj.setValue("%.4f" % max(fL), "diffrn_radiation_wavelength_maximum", 0)
+
+            except Exception as e:
+                logger.exception("%s failing wavelength processing with %s", entryId, str(e))
+            #
+            # JDW
+            self.__updateReflnsResolution(dataContainer)
+            return True
+        except Exception as e:
+            logger.exception("For %s %r failing with %s", dataContainer.getName(), catName, str(e))
+        #
+        return False
+
+    def filterBlockByMethod(self, dataContainer, blockName, **kwargs):
+        """Filter empty placeholder data categories by experimental method."""
+        logger.debug("Starting with %r blockName %r kwargs %r", dataContainer.getName(), blockName, kwargs)
+        try:
+            if not dataContainer.exists("exptl"):
+                return False
+            #
+            xObj = dataContainer.getObj("exptl")
+            methodL = xObj.getAttributeValueList("method")
+            objNameL = []
+            # Test for a diffraction method in the case of multiple methods
+            if len(methodL) > 1:
+                isXtal = False
+                for method in methodL:
+                    if method in ["X-RAY DIFFRACTION", "FIBER DIFFRACTION", "POWDER DIFFRACTION", "ELECTRON CRYSTALLOGRAPHY", "NEUTRON DIFFRACTION", "ELECTRON DIFFRACTION"]:
+                        isXtal = True
+                        break
+                if not isXtal:
+                    objNameL = ["cell", "symmetry", "refine", "refine_hist", "software", "diffrn", "diffrn_radiation"]
+            else:
+                #
+                mS = methodL[0].upper()
+                if mS in ["X-RAY DIFFRACTION", "FIBER DIFFRACTION", "POWDER DIFFRACTION", "ELECTRON CRYSTALLOGRAPHY", "NEUTRON DIFFRACTION", "ELECTRON DIFFRACTION"]:
+                    objNameL = []
+                elif mS in ["SOLUTION NMR", "SOLID-STATE NMR"]:
+                    objNameL = ["cell", "symmetry", "refine", "refine_hist", "software", "diffrn", "diffrn_radiation"]
+                elif mS in ["ELECTRON MICROSCOPY", "CRYO-ELECTRON MICROSCOPY"]:
+                    objNameL = ["cell", "symmetry", "refine", "refine_hist", "software", "diffrn", "diffrn_radiation"]
+                elif mS in ["SOLUTION SCATTERING", "EPR", "THEORETICAL MODEL", "INFRARED SPECTROSCOPY", "FLUORESCENCE TRANSFER"]:
+                    objNameL = ["cell", "symmetry", "refine", "refine_hist", "software", "diffrn", "diffrn_radiation"]
+                else:
+                    logger.error("%s Unexpected method %r", dataContainer.getName(), mS)
+            #
+            for objName in objNameL:
+                dataContainer.remove(objName)
+            return True
+        except Exception as e:
+            logger.exception("For %s failing with %s", dataContainer.getName(), str(e))
+        return False
+
+    def filterEnumerations(self, dataContainer, catName, atName, **kwargs):
+        """Standardize the item value to conform to enumeration specifications."""
+        logger.debug("Starting with %r %r %r %r", dataContainer.getName(), atName, catName, kwargs)
+        subD = {("pdbx_reference_molecule", "class"): [("Anti-tumor", "Antitumor")]}
+        try:
+            if not dataContainer.exists(catName):
+                return False
+            #
+            cObj = dataContainer.getObj(catName)
+            if not cObj.hasAttribute(atName):
+                return False
+            #
+            subL = subD[(catName, atName)] if (catName, atName) in subD else []
+            #
+            for ii in range(cObj.getRowCount()):
+                tV = cObj.getValue(atName, ii)
+                if tV and tV not in [".", "?"]:
+                    for sub in subL:
+                        if sub[0] in tV:
+                            tV = tV.replace(sub[0], sub[1])
+                            cObj.setValue(tV, atName, ii)
+            return True
+        except Exception as e:
+            logger.exception("%s %s %s failing with %s", dataContainer.getName(), catName, atName, str(e))
+        return False
+
+    def __getRepresentativeModels(self, dataContainer):
+        """Return the list of representative models
+
+        Example:
+            #
+            _pdbx_nmr_ensemble.entry_id                                      5TM0
+            _pdbx_nmr_ensemble.conformers_calculated_total_number            15
+            _pdbx_nmr_ensemble.conformers_submitted_total_number             15
+            _pdbx_nmr_ensemble.conformer_selection_criteria                  'all calculated structures submitted'
+            _pdbx_nmr_ensemble.representative_conformer                      ?
+            _pdbx_nmr_ensemble.average_constraints_per_residue               ?
+            _pdbx_nmr_ensemble.average_constraint_violations_per_residue     ?
+            _pdbx_nmr_ensemble.maximum_distance_constraint_violation         ?
+            _pdbx_nmr_ensemble.average_distance_constraint_violation         ?
+            _pdbx_nmr_ensemble.maximum_upper_distance_constraint_violation   ?
+            _pdbx_nmr_ensemble.maximum_lower_distance_constraint_violation   ?
+            _pdbx_nmr_ensemble.distance_constraint_violation_method          ?
+            _pdbx_nmr_ensemble.maximum_torsion_angle_constraint_violation    ?
+            _pdbx_nmr_ensemble.average_torsion_angle_constraint_violation    ?
+            _pdbx_nmr_ensemble.torsion_angle_constraint_violation_method     ?
+            #
+            _pdbx_nmr_representative.entry_id             5TM0
+            _pdbx_nmr_representative.conformer_id         1
+            _pdbx_nmr_representative.selection_criteria   'fewest violations'
+        """
+        repModelL = []
+        if dataContainer.exists("pdbx_nmr_representative"):
+            tObj = dataContainer.getObj("pdbx_nmr_representative")
+            if tObj.hasAttribute("conformer_id"):
+                for ii in range(tObj.getRowCount()):
+                    nn = tObj.getValue("conformer_id", ii)
+                    if nn is not None and nn.isdigit():
+                        repModelL.append(nn)
+
+        if dataContainer.exists("pdbx_nmr_ensemble"):
+            tObj = dataContainer.getObj("pdbx_nmr_ensemble")
+            if tObj.hasAttribute("representative_conformer"):
+                nn = tObj.getValue("representative_conformer", 0)
+                if nn is not None and nn and nn.isdigit():
+                    repModelL.append(nn)
+        #
+        repModelL = list(set(repModelL))
+        if not repModelL:
+            logger.debug("Missing representative model data for %s using 1", dataContainer.getName())
+            repModelL = ["1"]
+
+        return repModelL
+
+    def __filterExperimentalResolution(self, dataContainer):
+        """Collect resolution estimates from method specific sources."""
+        rL = []
+        if dataContainer.exists("refine"):
+            tObj = dataContainer.getObj("refine")
+            if tObj.hasAttribute("ls_d_res_high"):
+                for ii in range(tObj.getRowCount()):
+                    rv = tObj.getValue("ls_d_res_high", ii)
+                    if self.__commonU.isFloat(rv):
+                        rL.append(rv)
+
+        if dataContainer.exists("em_3d_reconstruction"):
+            tObj = dataContainer.getObj("em_3d_reconstruction")
+            if tObj.hasAttribute("resolution"):
+                for ii in range(tObj.getRowCount()):
+                    rv = tObj.getValue("resolution", ii)
+                    if self.__commonU.isFloat(rv):
+                        rL.append(rv)
+        return rL
+
+    def addCategoryPrimaryCitation(self, dataContainer, blockName, **kwargs):
+        """
+        Add  rcsb_primary_citation category as a copy or the citation category
+        with rcsb extensions.
+        """
+        catName = None
+        try:
+            logger.debug("Starting with %r %r %r", dataContainer.getName(), blockName, kwargs)
+            # Exit if source categories are missing
+            if not dataContainer.exists("citation"):
+                return False
+            cObj = dataContainer.getObj("citation")
+            catName = "rcsb_primary_citation"
+            #
+            if not dataContainer.exists(catName):
+                dataContainer.append(DataCategory(catName, attributeNameList=self.__dApi.getAttributeNameList(catName)))
+            # --------------------------------------------------------------------------------------------------------
+            rObj = dataContainer.getObj(catName)
+            atNameList = self.__dApi.getAttributeNameList(catName)
+            logger.debug("Category %s dict attributes %r", catName, atNameList)
+            #
+            for ii in range(cObj.getRowCount()):
+                pv = cObj.getValue("id", ii)
+                if pv.upper() == "PRIMARY":
+                    for atName in atNameList:
+                        if cObj.hasAttribute(atName):
+                            rObj.setValue(cObj.getValue(atName, ii), atName, 0)
+
+            return True
+        except Exception as e:
+            logger.exception("%s %s failing with %s", dataContainer.getName(), catName, str(e))
+        return False
+
+    def __updateReflnsResolution(self, dataContainer):
+        """Find a plausable data collection diffraction high resolution limit from one of the following sources.
+        #
+        _rcsb_entry_info.diffrn_resolution_high_value
+        _rcsb_entry_info.diffrn_resolution_high_provenance_source
+
+        Update category 'reflns' with any missing resolution extrema data using limits in category reflns_shell.
+
+            _reflns.entry_id                     2DCG
+            _reflns.d_resolution_high            0.900
+            _reflns.pdbx_diffrn_id               1
+            _reflns.pdbx_ordinal                 1
+
+
+            _refine.entry_id                                 2DCG
+            _refine.ls_number_reflns_obs                     15000
+            _refine.ls_number_reflns_all                     ?
+            _refine.pdbx_ls_sigma_I                          2.000
+            _refine.ls_d_res_low                             ?
+            _refine.ls_d_res_high                            0.900
+            _refine.pdbx_refine_id                           'X-RAY DIFFRACTION'
+            _refine.pdbx_diffrn_id                           1
+
+            _reflns_shell.d_res_high             1.18
+            _reflns_shell.d_res_low              1.25
+            _reflns_shell.pdbx_ordinal           1
+            _reflns_shell.pdbx_diffrn_id         1
+            #
+
+        """
+        try:
+            logger.debug("Starting with %r", dataContainer.getName())
+            #
+            if not dataContainer.exists("exptl") or not dataContainer.exists("rcsb_entry_info"):
+                return False
+            # --------------------------------------------------------------------------------------------------------
+            #  Only applicable to X-ray
+            #
+            xObj = dataContainer.getObj("exptl")
+            methodL = xObj.getAttributeValueList("method")
+            _, expMethod = self.__commonU.filterExperimentalMethod(methodL)
+            if expMethod not in ["X-ray", "Neutron", "Multiple methods"]:
+                return False
+            #
+            resValue = resProvSource = None
+            #
+            # Here are the various cases -
+            if dataContainer.exists("reflns"):
+                rObj = dataContainer.getObj("reflns")
+                if rObj.hasAttribute("d_resolution_high"):
+                    rvL = rObj.getAttributeValueList("d_resolution_high")
+                    fvL = [float(rv) for rv in rvL if self.__commonU.isFloat(rv)]
+                    if fvL:
+                        resValue = round(min(fvL), 2)
+                        resProvSource = "Depositor assigned"
+
+            if not resValue and dataContainer.exists("reflns_shell"):
+                rObj = dataContainer.getObj("reflns_shell")
+                if rObj.hasAttribute("d_res_high"):
+                    rvL = rObj.getAttributeValueList("d_res_high")
+                    fvL = [float(rv) for rv in rvL if self.__commonU.isFloat(rv)]
+                    if fvL:
+                        resValue = round(min(fvL), 2)
+                        resProvSource = "From the high resolution shell"
+
+            if not resValue and dataContainer.exists("refine"):
+
+                rObj = dataContainer.getObj("refine")
+                if rObj.hasAttribute("ls_d_res_high"):
+                    fvL = []
+                    for ii in range(rObj.getRowCount()):
+                        rId = rObj.getValue("pdbx_refine_id", ii)
+                        if rId in ["X-RAY DIFFRACTION", "NEUTRON DIFFRACTION", "FIBER DIFFRACTION"]:
+                            rv = rObj.getValue("ls_d_res_high", ii)
+                            if self.__commonU.isFloat(rv):
+                                fvL.append(float(rv))
+                    if fvL:
+                        resValue = round(min(fvL), 2)
+                        resProvSource = "From refinement resolution cutoff"
+            #
+            if not resValue:
+                logger.debug("No source of data collection resolution available for %r", dataContainer.getName())
+            else:
+                logger.debug("Data collection diffraction limit %r PS %r", resValue, resProvSource)
+
+            if resValue:
+                eObj = dataContainer.getObj("rcsb_entry_info")
+                for atName in ["diffrn_resolution_high_value", "diffrn_resolution_high_provenance_source"]:
+                    if not eObj.hasAttribute(atName):
+                        eObj.appendAttribute(atName)
+                eObj.setValue(resValue, "diffrn_resolution_high_value", 0)
+                eObj.setValue(resProvSource, "diffrn_resolution_high_provenance_source", 0)
+                # --------------------------------------------------------------------------------------------------------
+                return True
+        except Exception as e:
+            logger.exception("%s failing with %s", dataContainer.getName(), str(e))
+        return False
diff --git a/rcsb/utils/dictionary/DictMethodResourceProvider.py b/rcsb/utils/dictionary/DictMethodResourceProvider.py
new file mode 100644
index 0000000..ec4cf74
--- /dev/null
+++ b/rcsb/utils/dictionary/DictMethodResourceProvider.py
@@ -0,0 +1,393 @@
+##
+# File:    DictMethodResourceProvider.py
+# Author:  J. Westbrook
+# Date:    3-Jun-2019
+# Version: 0.001 Initial version
+#
+#
+# Updates:
+#  17-Jul-2019 jdw add resource for common utilities and dictionary api
+#   7-Aug-2019 jdw use dictionary locator map
+#  13-Aug-2019 jdw return class instances in all cases. Add cache management support.
+#   9-Sep-2019 jdw add AtcProvider() and SiftsSummaryProvider()
+#  25-Nov-2019 jdw add CitationReferenceProvider(), ChemCompProvider() and  JournalTitleAbbreviationProvider()'s
+#  16-Feb-2020 jdw add support for configuration of development resources
+#  19-Mar-2020 jdw add ResidProvider() and send cachePath directly to all modules in rcsb.utils.chemref.
+#  29-Jul-2020 jdw add PubChemProvider() from  rcsb.utils.chemref.
+#  30-Jul-2020 jdw add PharosProvider() from  rcsb.utils.chemref.
+#  29-Oct-2020 jdw add method getReferenceSequenceAlignmentOpt()
+##
+##
+"""
+Resource provider for DictMethodHelper tools.
+
+"""
+__docformat__ = "restructuredtext en"
+__author__ = "John Westbrook"
+__email__ = "jwest@rcsb.rutgers.edu"
+__license__ = "Apache 2.0"
+
+import logging
+import os
+import platform
+import resource
+import time
+
+from rcsb.utils.dictionary.DictionaryApiProviderWrapper import DictionaryApiProviderWrapper
+from rcsb.utils.dictionary.DictMethodCommonUtils import DictMethodCommonUtils
+from rcsb.utils.chemref.AtcProvider import AtcProvider
+from rcsb.utils.chemref.ChemCompModelProvider import ChemCompModelProvider
+from rcsb.utils.chemref.ChemCompProvider import ChemCompProvider
+from rcsb.utils.chemref.DrugBankProvider import DrugBankProvider
+from rcsb.utils.chemref.PsiModProvider import PsiModProvider
+from rcsb.utils.chemref.PharosProvider import PharosProvider
+from rcsb.utils.chemref.PubChemProvider import PubChemProvider
+from rcsb.utils.chemref.RcsbLigandScoreProvider import RcsbLigandScoreProvider
+from rcsb.utils.chemref.ResidProvider import ResidProvider
+from rcsb.utils.citation.CitationReferenceProvider import CitationReferenceProvider
+from rcsb.utils.citation.JournalTitleAbbreviationProvider import JournalTitleAbbreviationProvider
+from rcsb.utils.ec.EnzymeDatabaseProvider import EnzymeDatabaseProvider
+from rcsb.utils.io.SingletonClass import SingletonClass
+from rcsb.utils.seq.SiftsSummaryProvider import SiftsSummaryProvider
+from rcsb.utils.struct.CathClassificationProvider import CathClassificationProvider
+from rcsb.utils.struct.ScopClassificationProvider import ScopClassificationProvider
+from rcsb.utils.taxonomy.TaxonomyProvider import TaxonomyProvider
+from rcsb.utils.validation.ValidationReportProvider import ValidationReportProvider
+
+logger = logging.getLogger(__name__)
+
+
+class DictMethodResourceProvider(SingletonClass):
+    """Resource provider for DictMethodHelper tools."""
+
+    def __init__(self, cfgOb, **kwargs):
+        """Resource provider for dictionary method runner.
+
+        Arguments:
+            cfgOb {object} -- instance ConfigUtils class
+
+        Keyword agruments:
+            configName {string} -- configuration section name (default: default section name)
+            cachePath {str} -- path used for temporary file management (default: '.')
+
+        """
+        self.__cfgOb = cfgOb
+
+        self.__configName = kwargs.get("configName", self.__cfgOb.getDefaultSectionName())
+        self.__cachePath = kwargs.get("cachePath", ".")
+        #
+        self.__taxU = None
+        self.__ecU = None
+        self.__scopU = None
+        self.__cathU = None
+        self.__dbU = None
+        self.__residU = None
+        self.__psimodU = None
+        self.__ccU = None
+        self.__ccmU = None
+        self.__commonU = None
+        self.__dApiW = None
+        self.__atcP = None
+        # self.__siftsAbbreviated = kwargs.get("siftsAbbreviated", "PROD")
+        self.__siftsAbbreviated = kwargs.get("siftsAbbreviated", "TEST")
+        self.__ssP = None
+        self.__vrptP = None
+        self.__crP = None
+        self.__jtaP = None
+        self.__pcP = None
+        self.__phP = None
+        self.__rlsP = None
+        #
+        #
+        # self.__wsPattern = re.compile(r"\s+", flags=re.UNICODE | re.MULTILINE)
+        # self.__re_non_digit = re.compile(r"[^\d]+")
+        #
+        self.__resourcesD = {
+            "SiftsSummaryProvider instance": self.__fetchSiftsSummaryProvider,
+            "Dictionary API instance (pdbx_core)": self.__fetchDictionaryApi,
+            "TaxonomyProvider instance": self.__fetchTaxonomyProvider,
+            "ScopProvider instance": self.__fetchScopProvider,
+            "CathProvider instance": self.__fetchCathProvider,
+            "EnzymeProvider instance": self.__fetchEnzymeProvider,
+            "DrugBankProvider instance": self.__fetchDrugBankProvider,
+            "ResidProvider instance": self.__fetchResidProvider,
+            "PsiModProvider instance": self.__fetchPsiModProvider,
+            "ChemCompModelProvider instance": self.__fetchChemCompModelProvider,
+            "ChemCompProvider instance": self.__fetchChemCompProvider,
+            "AtcProvider instance": self.__fetchAtcProvider,
+            "DictMethodCommonUtils instance": self.__fetchCommonUtils,
+            "ValidationProvider instance": self.__fetchValidationProvider,
+            "CitationReferenceProvider instance": self.__fetchCitationReferenceProvider,
+            "JournalTitleAbbreviationProvider instance": self.__fetchJournalTitleAbbreviationProvider,
+            "PubChemProvider instance": self.__fetchPubChemProvider,
+            "PharosProvider instance": self.__fetchPharosProvider,
+            "RcsbLigandScoreProvider instance": self.__fetchRcsbLigandScoreProvider,
+        }
+        logger.debug("Dictionary resource provider init completed")
+        #
+
+    def echo(self, msg):
+        logger.info(msg)
+
+    def getReferenceSequenceAlignmentOpt(self):
+        return self.__cfgOb.get("REFERENCE_SEQUENCE_ALIGNMETS", sectionName=self.__configName, default="SIFTS")
+
+    def getResource(self, resourceName, default=None, useCache=True, **kwargs):
+        """Return the named input resource or the default value.
+
+        Arguments:
+            resourceName {str} -- resource name
+            useCache (bool, optional): use current cace. Defaults to True.
+
+        Keyword Arguments:
+            default {obj} -- default return value for missing resources (default: {None})
+
+        Returns:
+            [obj] -- resource object
+        """
+        logger.debug("Requesting resource %r", resourceName)
+        if resourceName in self.__resourcesD:
+            return self.__resourcesD[resourceName](self.__cfgOb, self.__configName, self.__cachePath, useCache=useCache, **kwargs)
+        else:
+            logger.error("Request for unsupported resource %r returning %r", resourceName, default)
+        #
+        return default
+
+    def cacheResources(self, useCache=False, **kwargs):
+        """Update and optionally clear all resource caches.
+
+        Args:
+            useCache (bool, optional): use current cace. Defaults to False.
+
+        Returns:
+            bool: True for success or False otherwise
+        """
+        ret = True
+        tName = "CHECKING" if useCache else "REBUILDING"
+        logger.info("Begin %s cache for %d resources", tName, len(self.__resourcesD))
+        #
+        for resourceName in self.__resourcesD:
+            startTime = time.time()
+            logger.debug("Caching resources for %r", resourceName)
+            tU = self.__resourcesD[resourceName](self.__cfgOb, self.__configName, self.__cachePath, useCache=useCache, **kwargs)
+            ok = tU.testCache()
+            if not ok:
+                logger.error("%s %s fails", tName, resourceName)
+            ret = ret and ok
+            if not ret:
+                logger.info("%s resource %r step status %r cumulative status %r", tName, resourceName, ok, ret)
+            self.__resourceUsageReport(startTime)
+        #
+        logger.info("Completed %s %d resources with status %r", tName, len(self.__resourcesD), ret)
+        return ret
+
+    def __resourceUsageReport(self, startTime):
+        unitS = "MB" if platform.system() == "Darwin" else "GB"
+        rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+        # logger.info("Maximum total resident memory size %.3f %s", rusageMax / 10 ** 6, unitS)
+        endTime = time.time()
+        logger.info(
+            "Step completed at %s (%.4f secs/%.3f %s)",
+            time.strftime("%Y %m %d %H:%M:%S", time.localtime()),
+            endTime - startTime,
+            rusageMax / 10 ** 6,
+            unitS,
+        )
+
+    def __fetchCitationReferenceProvider(self, cfgOb, configName, cachePath, useCache=True, **kwargs):
+        logger.debug("configName %s cachePath %s kwargs %r", configName, cachePath, kwargs)
+        if not self.__crP:
+            cachePath = os.path.join(cachePath, cfgOb.get("CITATION_REFERENCE_CACHE_DIR", sectionName=configName))
+            self.__crP = CitationReferenceProvider(cachePath=cachePath, useCache=useCache, **kwargs)
+        return self.__crP
+
+    def __fetchJournalTitleAbbreviationProvider(self, cfgOb, configName, cachePath, useCache=True, **kwargs):
+        logger.debug("configName %s cachePath %s kwargs %r", configName, cachePath, kwargs)
+        if not self.__jtaP:
+            cachePath = os.path.join(cachePath, cfgOb.get("CITATION_REFERENCE_CACHE_DIR", sectionName=configName))
+            self.__jtaP = JournalTitleAbbreviationProvider(cachePath=cachePath, useCache=useCache, **kwargs)
+        return self.__jtaP
+
+    def __fetchTaxonomyProvider(self, cfgOb, configName, cachePath, useCache=True, **kwargs):
+        logger.debug("configName %s cachePath %s kwargs %r", configName, cachePath, kwargs)
+        if not self.__taxU:
+            taxonomyDataPath = os.path.join(cachePath, cfgOb.get("NCBI_TAXONOMY_CACHE_DIR", sectionName=configName))
+            self.__taxU = TaxonomyProvider(taxDirPath=taxonomyDataPath, useCache=useCache, **kwargs)
+        return self.__taxU
+
+    def __fetchScopProvider(self, cfgOb, configName, cachePath, useCache=True, **kwargs):
+        logger.debug("configName %s cachePath %s kwargs %r", configName, cachePath, kwargs)
+        if not self.__scopU:
+            structDomainDataPath = os.path.join(cachePath, cfgOb.get("STRUCT_DOMAIN_CLASSIFICATION_CACHE_DIR", sectionName=configName))
+            self.__scopU = ScopClassificationProvider(scopDirPath=structDomainDataPath, useCache=useCache, **kwargs)
+        return self.__scopU
+
+    def __fetchCathProvider(self, cfgOb, configName, cachePath, useCache=True, **kwargs):
+        logger.debug("configName %s cachePath %s kwargs %r", configName, cachePath, kwargs)
+        if not self.__cathU:
+            structDomainDataPath = os.path.join(cachePath, cfgOb.get("STRUCT_DOMAIN_CLASSIFICATION_CACHE_DIR", sectionName=configName))
+            self.__cathU = CathClassificationProvider(cathDirPath=structDomainDataPath, useCache=useCache, **kwargs)
+        return self.__cathU
+
+    def __fetchEnzymeProvider(self, cfgOb, configName, cachePath, useCache=True, **kwargs):
+        logger.debug("configName %s cachePath %s kwargs %r", configName, cachePath, kwargs)
+        if not self.__ecU:
+            enzymeDataPath = os.path.join(cachePath, cfgOb.get("ENZYME_CLASSIFICATION_CACHE_DIR", sectionName=configName))
+            self.__ecU = EnzymeDatabaseProvider(enzymeDirPath=enzymeDataPath, useCache=useCache, **kwargs)
+        return self.__ecU
+
+    #
+    def __fetchDrugBankProvider(self, cfgOb, configName, cachePath, useCache=True, **kwargs):
+        _ = cfgOb
+        logger.debug("configName %s cachePath %s kwargs %r", configName, cachePath, kwargs)
+        if not self.__dbU:
+            # If a mock DrugBank URL is provided add this as an argument.
+            mockUrlTarget = cfgOb.getPath("DRUGBANK_MOCK_URL_TARGET", sectionName=configName)
+            logger.info("Using mock DrugBank source file %r", mockUrlTarget)
+            if mockUrlTarget:
+                kwargs["urlTarget"] = mockUrlTarget
+                logger.info("Using mock DrugBank source file %r", mockUrlTarget)
+            un = cfgOb.get("_DRUGBANK_AUTH_USERNAME", sectionName=configName)
+            pw = cfgOb.get("_DRUGBANK_AUTH_PASSWORD", sectionName=configName)
+            self.__dbU = DrugBankProvider(cachePath=cachePath, useCache=useCache, username=un, password=pw, **kwargs)
+        return self.__dbU
+
+    #
+    def __fetchResidProvider(self, cfgOb, configName, cachePath, useCache=True, **kwargs):
+        _ = cfgOb
+        logger.debug("configName %s cachePath %s kwargs %r", configName, cachePath, kwargs)
+        if not self.__residU:
+            # dbDataPath = os.path.join(cachePath, cfgOb.get("RESID_CACHE_DIR", sectionName=configName))
+            self.__residU = ResidProvider(cachePath=cachePath, useCache=useCache, **kwargs)
+        return self.__residU
+
+    def __fetchPsiModProvider(self, cfgOb, configName, cachePath, useCache=True, **kwargs):
+        _ = cfgOb
+        logger.debug("configName %s cachePath %s kwargs %r", configName, cachePath, kwargs)
+        if not self.__psimodU:
+            self.__psimodU = PsiModProvider(cachePath=cachePath, useCache=useCache, **kwargs)
+        return self.__psimodU
+
+    def __fetchChemCompModelProvider(self, cfgOb, configName, cachePath, useCache=True, **kwargs):
+        _ = cfgOb
+        logger.debug("configName %s cachePath %s kwargs %r", configName, cachePath, kwargs)
+        if not self.__ccmU:
+            # dirPath = os.path.join(cachePath, cfgOb.get("CHEM_COMP_CACHE_DIR", sectionName=configName))
+            self.__ccmU = ChemCompModelProvider(cachePath=cachePath, useCache=useCache, **kwargs)
+        return self.__ccmU
+
+    def __fetchChemCompProvider(self, cfgOb, configName, cachePath, useCache=True, **kwargs):
+        _ = cfgOb
+        logger.debug("configName %s cachePath %s kwargs %r", configName, cachePath, kwargs)
+        if not self.__ccU:
+            # dirPath = os.path.join(cachePath, cfgOb.get("CHEM_COMP_CACHE_DIR", sectionName=configName))
+            self.__ccU = ChemCompProvider(cachePath=cachePath, useCache=useCache, **kwargs)
+        return self.__ccU
+
+    def __fetchAtcProvider(self, cfgOb, configName, cachePath, useCache=True, **kwargs):
+        _ = cfgOb
+        logger.debug("configName %s cachePath %s kwargs %r", configName, cachePath, kwargs)
+        if not self.__atcP:
+            # dirPath = os.path.join(cachePath, cfgOb.get("ATC_CACHE_DIR", sectionName=configName))
+            self.__atcP = AtcProvider(cachePath=cachePath, useCache=useCache, **kwargs)
+        return self.__atcP
+
+    def __fetchSiftsSummaryProvider(self, cfgOb, configName, cachePath, useCache=True, **kwargs):
+        logger.debug("configName %s cachePath %s kwargs %r", configName, cachePath, kwargs)
+        if not self.__ssP:
+            srcDirPath = os.path.join(cachePath, cfgOb.getPath("SIFTS_SUMMARY_DATA_PATH", sectionName=configName))
+            cacheDirPath = os.path.join(cachePath, cfgOb.get("SIFTS_SUMMARY_CACHE_DIR", sectionName=configName))
+            logger.debug("ssP %r %r", srcDirPath, cacheDirPath)
+            self.__ssP = SiftsSummaryProvider(srcDirPath=srcDirPath, cacheDirPath=cacheDirPath, useCache=useCache, abbreviated=self.__siftsAbbreviated, **kwargs)
+            logger.debug("ssP entry count %d", self.__ssP.getEntryCount())
+        return self.__ssP
+
+    def __fetchValidationProvider(self, cfgOb, configName, cachePath, useCache=True, **kwargs):
+        logger.debug("configName %s cachePath %s kwargs %r", configName, cachePath, kwargs)
+        if not self.__vrptP:
+            urlTarget = cfgOb.get("VRPT_DICT_MAPPING_LOCATOR", sectionName=configName)
+            dirPath = os.path.join(cachePath, cfgOb.get("DICTIONARY_CACHE_DIR", sectionName=configName))
+            self.__vrptP = ValidationReportProvider(dirPath=dirPath, urlTarget=urlTarget, useCache=useCache)
+        #
+        return self.__vrptP
+
+    def __fetchCommonUtils(self, cfgOb, configName, cachePath, useCache=None, **kwargs):
+        logger.debug("configName %s cachePath %r kwargs %r", configName, cachePath, kwargs)
+        _ = cfgOb
+        _ = useCache
+        if not self.__commonU:
+            self.__commonU = DictMethodCommonUtils(**kwargs)
+        return self.__commonU
+
+    def __fetchDictionaryApi(self, cfgOb, configName, cachePath, useCache=None, **kwargs):
+        logger.debug("configName %s cachePath %s kwargs %r", configName, cachePath, kwargs)
+        schemaName = kwargs.get("schemaName", "pdbx_core")
+        self.__dApiW = DictionaryApiProviderWrapper(cfgOb, cachePath, useCache=useCache)
+        dictApi = self.__dApiW.getApiByName(schemaName)
+        # numRev = dictApi.getDictionaryRevisionCount()
+        return dictApi
+
+    def __fetchPubChemProvider(self, cfgOb, configName, cachePath, useCache=True, **kwargs):
+        logger.debug("configName %s cachePath %s kwargs %r", configName, cachePath, kwargs)
+        if not self.__pcP:
+            #
+            try:
+                minCount = 0
+                userName = cfgOb.get("_STASH_AUTH_USERNAME", sectionName=configName)
+                password = cfgOb.get("_STASH_AUTH_PASSWORD", sectionName=configName)
+                basePath = cfgOb.get("_STASH_SERVER_BASE_PATH", sectionName=configName)
+                url = cfgOb.get("STASH_SERVER_URL", sectionName=configName)
+                urlFallBack = cfgOb.get("STASH_SERVER_FALLBACK_URL", sectionName=configName)
+                #
+                pcP = PubChemProvider(cachePath=cachePath, useCache=useCache)
+                ok = pcP.fromStash(url, basePath, userName=userName, password=password)
+                ok = pcP.reload()
+                ok = pcP.testCache(minCount=10)
+                if not ok:
+                    ok = pcP.fromStash(urlFallBack, basePath, userName=userName, password=password)
+                    ok = pcP.testCache(minCount=minCount)
+                #
+                if pcP:
+                    self.__pcP = pcP
+                    riD = pcP.getIdentifiers()
+                    logger.info("Fetched PubChem mapping dictionary (%d)", len(riD))
+            except Exception as e:
+                logger.exception("Failing with %s", str(e))
+            #
+        return self.__pcP
+
+    def __fetchPharosProvider(self, cfgOb, configName, cachePath, useCache=True, **kwargs):
+        logger.debug("configName %s cachePath %s kwargs %r", configName, cachePath, kwargs)
+        if not self.__phP:
+            # --
+            try:
+                minCount = 0
+                userName = cfgOb.get("_STASH_AUTH_USERNAME", sectionName=configName)
+                password = cfgOb.get("_STASH_AUTH_PASSWORD", sectionName=configName)
+                basePath = cfgOb.get("_STASH_SERVER_BASE_PATH", sectionName=configName)
+                url = cfgOb.get("STASH_SERVER_URL", sectionName=configName)
+                urlFallBack = cfgOb.get("STASH_SERVER_FALLBACK_URL", sectionName=configName)
+                #
+                phP = PharosProvider(cachePath=cachePath, useCache=useCache)
+                ok = phP.fromStash(url, basePath, userName=userName, password=password)
+                ok = phP.reload()
+                ok = phP.testCache(minCount=10)
+                if not ok:
+                    ok = phP.fromStash(urlFallBack, basePath, userName=userName, password=password)
+                    ok = phP.testCache(minCount=minCount)
+                #
+                if phP:
+                    self.__phP = phP
+                    riD = phP.getIdentifiers()
+                    logger.info("Fetched Pharos ChEMBL identifiers (%d)", len(riD))
+            except Exception as e:
+                logger.warning("Failing with %s", str(e))
+            #
+        return self.__phP
+
+    def __fetchRcsbLigandScoreProvider(self, cfgOb, configName, cachePath, useCache=None, **kwargs):
+        logger.debug("configName %s cachePath %s kwargs %r", configName, cachePath, kwargs)
+        _ = cfgOb
+        if not self.__rlsP:
+            self.__rlsP = RcsbLigandScoreProvider(cachePath=cachePath, useCache=useCache)
+        return self.__rlsP
diff --git a/rcsb/utils/dictionary/DictMethodRunner.py b/rcsb/utils/dictionary/DictMethodRunner.py
new file mode 100644
index 0000000..64d411f
--- /dev/null
+++ b/rcsb/utils/dictionary/DictMethodRunner.py
@@ -0,0 +1,227 @@
+##
+# File:    DictMethodRunner.py
+# Author:  J. Westbrook
+# Date:    18-Aug-2018
+# Version: 0.001 Initial version
+#
+# Updates:
+# 12-Nov-2018 jdw Run block methods after category and attribute methods.
+#  5-Jun-2019 jdw Refactor and generalize and remove dependencies on rcsb.db package
+# 17-Jul-2019 jdw Propagate kwargs to __getModuleInstance()
+#
+##
+"""
+Manage the invocation of dictionary methods implemented in helper classes.
+
+"""
+__docformat__ = "restructuredtext en"
+__author__ = "John Westbrook"
+__email__ = "jwest@rcsb.rutgers.edu"
+__license__ = "Apache 2.0"
+
+import logging
+import sys
+from operator import itemgetter
+
+logger = logging.getLogger(__name__)
+
+
+class DictMethodRunner(object):
+    """Manage the invocation of dictionary methods implemented as class methods."""
+
+    def __init__(self, dictionaryApi, modulePathMap=None, **kwargs):
+        """Manage invocation of dictionary methods referenced in external modules.
+
+        Arguments:
+            dictionaryApi {object} -- instance of DictionaryApi() for dictionary with target method definitions
+
+        Keyword Arguments:
+            modulePathMap {dict str} -- mapping between dictionary module path and execution path (default: {None})
+            cacheModuleFlag {bool} -- flag to cache module instances (defaullt: True)
+            implentationSource {str} -- method implementation (default: 'reference')
+            methodCodes (list str) -- filter methods by codes {default: ['calculation']}
+        """
+        self.__dApi = dictionaryApi
+        self.__modulePathMap = modulePathMap if modulePathMap else {}
+        self.__cacheModuleFlag = kwargs.get("cacheModuleFlag", True)
+        methodCodes = kwargs.get("methodCodes", ["calculation"])
+        implementationSource = kwargs.get("implementationCodes", "reference")
+        #
+        self.__kwargs = kwargs
+        #
+        # Preserve and reuse the module instances if caching is enabled
+        self.__moduleCache = {}
+        #
+        self.__methodD = self.__getMethodInfo(implementationSource=implementationSource, methodCodes=methodCodes)
+        logger.debug("Method index %r", self.__methodD.items())
+
+    def __getMethodInfo(self, implementationSource="reference", methodCodes=None):
+        """Get method implementation with the input implementation source."""
+        methodCodes = methodCodes if methodCodes else ["calculation"]
+        methodD = {}
+        try:
+            methodIndex = self.__dApi.getMethodIndex()
+            for _, mrL in methodIndex.items():
+                for mr in mrL:
+                    mId = mr.getId()
+                    catName = mr.getCategoryName()
+                    atName = mr.getAttributeName()
+                    mType = mr.getType()
+                    if (catName, atName) not in methodD:
+                        methodD[(catName, atName)] = []
+                    methDef = self.__dApi.getMethod(mId)
+                    logger.debug("Category %s attribute %s mId %r type %r methDef %r", catName, atName, mId, mType, methDef)
+                    mSource = methDef.getImplementationSource()
+                    mCode = methDef.getCode()
+                    if mSource == implementationSource and mCode in methodCodes:
+                        mPriority = methDef.getPriority()
+                        mLang = methDef.getLanguage()
+                        mImplement = methDef.getImplementation()
+                        dD = {"METHOD_LANGUAGE": mLang, "METHOD_IMPLEMENT": mImplement, "METHOD_TYPE": mType, "METHOD_CODE": mCode, "METHOD_PRIORITY": mPriority}
+                        methodD[(catName, atName)].append(dD)
+                #
+        except Exception as e:
+            logger.exception("Failing with %s", str(e))
+        ##
+        logger.debug("Method dictionary %r", methodD)
+        return methodD
+
+    def __invokeAttributeMethod(self, methodPath, dataContainer, catName, atName, **kwargs):
+        """Invoke the input attribute method"""
+        ok = False
+        try:
+            modulePath, methodName = self.__methodPathSplit(methodPath)
+            mObj = self.__getModuleInstance(modulePath, **kwargs)
+            theMeth = getattr(mObj, methodName, None)
+            ok = theMeth(dataContainer, catName, atName, **kwargs)
+        except Exception as e:
+            logger.exception("Failed invoking attribute %s %s method %r with %s", catName, atName, methodPath, str(e))
+        return ok
+
+    def __invokeCategoryMethod(self, methodPath, dataContainer, catName, **kwargs):
+        """Invoke the input category method"""
+        ok = False
+        try:
+            modulePath, methodName = self.__methodPathSplit(methodPath)
+            mObj = self.__getModuleInstance(modulePath, **kwargs)
+            theMeth = getattr(mObj, methodName, None)
+            ok = theMeth(dataContainer, catName, **kwargs)
+        except Exception as e:
+            logger.exception("Failed invoking category %s method %r with %s", catName, methodPath, str(e))
+        return ok
+
+    def __invokeDatablockMethod(self, methodPath, dataContainer, blockName, **kwargs):
+        """Invoke the input data block method"""
+        ok = False
+        try:
+            modulePath, methodName = self.__methodPathSplit(methodPath)
+            mObj = self.__getModuleInstance(modulePath, **kwargs)
+            theMeth = getattr(mObj, methodName, None)
+            ok = theMeth(dataContainer, blockName, **kwargs)
+        except Exception as e:
+            logger.exception("Failed invoking block %s method %r with %s", blockName, methodPath, str(e))
+        return ok
+
+    def apply(self, dataContainer):
+        """Apply category, attribute and block level dictionary methods on the input data container."""
+        kwargs = self.__kwargs
+        mTupL = self.__getCategoryMethods()
+        logger.debug("Category methods %r", mTupL)
+        for catName, _, methodPath, _ in mTupL:
+            self.__invokeCategoryMethod(methodPath, dataContainer, catName, **kwargs)
+
+        mTupL = self.__getAttributeMethods()
+        logger.debug("Attribute methods %r", mTupL)
+        for catName, atName, methodPath, _ in mTupL:
+            self.__invokeAttributeMethod(methodPath, dataContainer, catName, atName, **kwargs)
+
+        mTupL = self.__getDatablockMethods()
+        logger.debug("Datablock methods %r", mTupL)
+        for blockName, _, methodPath, _ in mTupL:
+            self.__invokeDatablockMethod(methodPath, dataContainer, blockName, **kwargs)
+
+        return True
+
+    def __getDatablockMethods(self):
+        mL = []
+        try:
+            for (dictName, _), mDL in self.__methodD.items():
+                for mD in mDL:
+                    if mD["METHOD_TYPE"].lower() == "datablock":
+                        methodPath = mD["METHOD_IMPLEMENT"]
+                        mL.append((dictName, None, methodPath, mD["METHOD_PRIORITY"]))
+            mL = sorted(mL, key=itemgetter(3))
+            return mL
+        except Exception as e:
+            logger.exception("Failing dictName %s with %s", dictName, str(e))
+        return mL
+
+    def __getCategoryMethods(self):
+        mL = []
+        try:
+            for (catName, _), mDL in self.__methodD.items():
+                for mD in mDL:
+                    if mD["METHOD_TYPE"].lower() == "category":
+                        methodPath = mD["METHOD_IMPLEMENT"]
+                        mL.append((catName, None, methodPath, mD["METHOD_PRIORITY"]))
+            mL = sorted(mL, key=itemgetter(3))
+            return mL
+        except Exception as e:
+            logger.exception("Failing catName %r with %s", catName, str(e))
+        return mL
+
+    def __getAttributeMethods(self):
+        mL = []
+        try:
+            for (catName, atName), mDL in self.__methodD.items():
+                for mD in mDL:
+                    if mD["METHOD_TYPE"].lower() == "attribute":
+                        methodPath = mD["METHOD_IMPLEMENT"]
+                        mL.append((catName, atName, methodPath, mD["METHOD_PRIORITY"]))
+            mL = sorted(mL, key=itemgetter(3))
+            return mL
+        except Exception as e:
+            logger.exception("Failing catName %s atName %s with %s", catName, atName, str(e))
+        return mL
+
+    def __methodPathSplit(self, methodPath):
+        """Extract module path and the method name from the input path.  Optional
+           remap the module path.
+
+        Arguments:
+            methodPath {str} -- implementation path from dictionary method definition
+
+        Returns:
+            {tuple str} -- module path, method name
+        """
+        try:
+            # Strip off any leading path of the module from the method path.
+            mpL = str(methodPath).split(".")
+            methodName = mpL[-1]
+            tp = ".".join(mpL[:-1])
+            modulePath = self.__modulePathMap[tp] if tp in self.__modulePathMap else tp
+            return modulePath, methodName
+        except Exception as e:
+            logger.error("Failing for method path %r with %s", methodPath, str(e))
+        return None, None
+
+    def __getModuleInstance(self, modulePath, **kwargs):
+        #
+        if self.__cacheModuleFlag and modulePath in self.__moduleCache:
+            return self.__moduleCache[modulePath]
+        #
+        mObj = None
+        try:
+            aMod = __import__(modulePath, globals(), locals(), [""])
+            sys.modules[modulePath] = aMod
+            #
+            # Strip off any leading path to the module before we instaniate the module object.
+            mpL = str(modulePath).split(".")
+            moduleName = mpL[-1]
+            #
+            mObj = getattr(aMod, moduleName)(**kwargs)
+            self.__moduleCache[modulePath] = mObj
+
+        except Exception as e:
+            logger.error("Failing to instance helper %r with %s", modulePath, str(e))
+        return mObj
diff --git a/rcsb/utils/dictionary/DictionaryApiProvider.py b/rcsb/utils/dictionary/DictionaryApiProvider.py
new file mode 100644
index 0000000..253bdb5
--- /dev/null
+++ b/rcsb/utils/dictionary/DictionaryApiProvider.py
@@ -0,0 +1,113 @@
+##
+# File:    DictionaryApiProvider.py
+# Author:  J. Westbrook
+# Date:    3-Jun-2019
+# Version: 0.001 Initial version
+#
+# Updates:
+#  14-Aug-2019 jdw adding remote dictionary fetch and caching logic.
+##
+"""
+Resource provider for dictionary API.
+
+"""
+__docformat__ = "restructuredtext en"
+__author__ = "John Westbrook"
+__email__ = "jwest@rcsb.rutgers.edu"
+__license__ = "Apache 2.0"
+
+import logging
+import os
+
+from mmcif.api.DictionaryApi import DictionaryApi
+from rcsb.utils.io.FileUtil import FileUtil
+from rcsb.utils.io.MarshalUtil import MarshalUtil
+from rcsb.utils.io.SingletonClass import SingletonClass
+
+logger = logging.getLogger(__name__)
+
+
+class DictionaryApiProvider(SingletonClass):
+    """Resource provider for dictionary APIs."""
+
+    def __init__(self, dirPath, useCache=True):
+        """Resource provider for dictionary APIs.
+
+        Args:
+            dirPath (str): path to the directory containing cache files
+            useCache (bool, optional): flag to use cached files. Defaults to True.
+
+        """
+        self.__apiMap = {}
+        self.__dirPath = dirPath
+        self.__useCache = useCache
+        #
+        self.__fileU = FileUtil(workPath=self.__dirPath)
+        logger.debug("Leaving constructor")
+
+    def __reload(self, dictLocators, dirPath, useCache=True):
+        """Reload local cache of dictionary resources and return a dictionary API instance.
+
+        Args:
+            dictLocators (list, str): list of locators for dictionary resource files
+            dirPath (str): path to the directory containing cache files
+            useCache (bool, optional): flag to use cached files. Defaults to True.
+
+        Returns:
+            (object): instance of dictionary API
+        """
+        #
+        # verify the exitence of the cache directory ...
+        self.__fileU.mkdir(dirPath)
+        if not useCache:
+            for dictLocator in dictLocators:
+                try:
+                    fn = self.__fileU.getFileName(dictLocator)
+                    os.remove(os.path.join(dirPath, fn))
+                except Exception:
+                    pass
+        #
+        ret = True
+        for dictLocator in dictLocators:
+            cacheFilePath = os.path.join(dirPath, self.__fileU.getFileName(dictLocator))
+            if useCache and self.__fileU.exists(cacheFilePath):
+                # nothing to do
+                continue
+            logger.debug("Fetching url %s caching in %s", dictLocator, cacheFilePath)
+            ok = self.__fileU.get(dictLocator, cacheFilePath)
+            ret = ret and ok
+        return ret
+
+    def getApi(self, dictLocators, **kwargs):
+        """Return a dictionary API object of the input dictioaries.
+
+        Arguments:
+            dictLocators {list str} -- list of dictionary locator paths
+
+        Returns:
+            [object] -- returns DictionaryApi() object for input dictionaries
+        """
+        dictFileNames = [self.__fileU.getFileName(dictLocator) for dictLocator in dictLocators]
+        dictTup = tuple(dictFileNames)
+        dApi = self.__apiMap[dictTup] if dictTup in self.__apiMap else self.__getApi(dictLocators, **kwargs)
+        self.__apiMap[dictTup] = dApi
+        return dApi
+
+    def __getApi(self, dictLocators, **kwargs):
+        """Return an instance of a dictionary API instance for the input dictionary locator list."""
+        consolidate = kwargs.get("consolidate", True)
+        replaceDefinition = kwargs.get("replaceDefinitions", True)
+        verbose = kwargs.get("verbose", True)
+        #
+        ok = self.__reload(dictLocators, self.__dirPath, useCache=self.__useCache)
+        #
+        dApi = None
+        if ok:
+            mU = MarshalUtil()
+            containerList = []
+            for dictLocator in dictLocators:
+                cacheFilePath = os.path.join(self.__dirPath, self.__fileU.getFileName(dictLocator))
+                containerList.extend(mU.doImport(cacheFilePath, fmt="mmcif-dict"))
+            #
+            dApi = DictionaryApi(containerList=containerList, consolidate=consolidate, replaceDefinition=replaceDefinition, verbose=verbose)
+        return dApi
diff --git a/rcsb/utils/dictionary/DictionaryApiProviderWrapper.py b/rcsb/utils/dictionary/DictionaryApiProviderWrapper.py
new file mode 100644
index 0000000..068ce1a
--- /dev/null
+++ b/rcsb/utils/dictionary/DictionaryApiProviderWrapper.py
@@ -0,0 +1,74 @@
+##
+# File:    DictionaryApiProviderWrapper.py
+# Author:  J. Westbrook
+# Date:   18-Aug-2019
+# Version: 0.001 Initial version
+#
+# Updates:
+#
+##
+"""
+Wrapper for dictionary API provider.
+
+"""
+__docformat__ = "restructuredtext en"
+__author__ = "John Westbrook"
+__email__ = "jwest@rcsb.rutgers.edu"
+__license__ = "Apache 2.0"
+
+import logging
+import os.path
+
+from rcsb.utils.dictionary.DictionaryApiProvider import DictionaryApiProvider
+from rcsb.utils.io.SingletonClass import SingletonClass
+
+logger = logging.getLogger(__name__)
+
+
+class DictionaryApiProviderWrapper(SingletonClass):
+    """Wrapper for dictionary API provider."""
+
+    def __init__(self, cfgOb, cachePath, useCache=True, **kwargs):
+        """Wrapper for dictionary API provider.
+
+        Args:
+            cfgOb (object):  ConfigInfo() object instance
+            cachePath (str): top path to contain the dictionary cache directory
+            useCache (bool, optional): flag to use cached files. Defaults to True.
+
+        """
+        self.__cfgOb = cfgOb
+        self.__configName = self.__cfgOb.getDefaultSectionName()
+        self.__contentInfoConfigName = "content_info_helper_configuration"
+        self.__dictLocatorMap = self.__cfgOb.get("DICT_LOCATOR_CONFIG_MAP", sectionName=self.__contentInfoConfigName)
+        dirPath = os.path.join(cachePath, self.__cfgOb.get("DICTIONARY_CACHE_DIR", sectionName=self.__configName))
+        self.__dP = DictionaryApiProvider(dirPath, useCache=useCache, **kwargs)
+        logger.debug("Leaving constructor")
+
+    def getApiByLocators(self, dictLocators, **kwargs):
+        """Return a dictionary API object for the input dictionary locator list.
+
+        Args:
+            dictLocators (list str): list of dictionary locators
+
+        Returns:
+            (object): Instance of DictionaryApi()
+        """
+        return self.__dP.getApi(dictLocators, **kwargs)
+
+    def getApiByName(self, databaseName, **kwargs):
+        """Return a dictionary API object for the input schema name.
+
+        Args:
+            databaseName (str): database schema name
+
+        Returns:
+            (object): Instance of DictionaryApi()
+        """
+        if databaseName not in self.__dictLocatorMap:
+            logger.error("Missing dictionary locator configuration for database schema %s", databaseName)
+            dictLocators = []
+        else:
+            dictLocators = [self.__cfgOb.getPath(configLocator, sectionName=self.__configName) for configLocator in self.__dictLocatorMap[databaseName]]
+        #
+        return self.__dP.getApi(dictLocators, **kwargs)
diff --git a/rcsb/utils/dictionary/__init__.py b/rcsb/utils/dictionary/__init__.py
new file mode 100644
index 0000000..7a42a5d
--- /dev/null
+++ b/rcsb/utils/dictionary/__init__.py
@@ -0,0 +1,5 @@
+__docformat__ = "restructuredtext en"
+__author__ = "John Westbrook"
+__email__ = "john.westbrook@rcsb.org"
+__license__ = "Apache 2.0"
+__version__ = "0.11"
diff --git a/rcsb/utils/tests-dictionary/__init__.py b/rcsb/utils/tests-dictionary/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/rcsb/utils/tests-dictionary/testDictMethodRunner.py b/rcsb/utils/tests-dictionary/testDictMethodRunner.py
new file mode 100644
index 0000000..151db78
--- /dev/null
+++ b/rcsb/utils/tests-dictionary/testDictMethodRunner.py
@@ -0,0 +1,129 @@
+# File:    DictMethodRunnerTests.py
+# Author:  J. Westbrook
+# Date:    18-Aug-2018
+# Version: 0.001
+#
+# Update:
+#    12-Nov-2018 jdw add chemical component and bird chemical component tests
+#     5-Jun-2019 jdw revise for new method runner api
+#    16-Jul-2019 jdw remove schema processing.
+##
+"""
+Tests for applying dictionary methods defined as references to helper plugin methods .
+
+"""
+
+__docformat__ = "restructuredtext en"
+__author__ = "John Westbrook"
+__email__ = "jwest@rcsb.rutgers.edu"
+__license__ = "Apache 2.0"
+
+import logging
+import os
+import time
+import unittest
+
+from mmcif.api.DictMethodRunner import DictMethodRunner
+from rcsb.utils.dictionary.DictionaryApiProviderWrapper import DictionaryApiProviderWrapper
+from rcsb.utils.dictionary.DictMethodResourceProvider import DictMethodResourceProvider
+from rcsb.utils.repository.RepositoryProvider import RepositoryProvider
+from rcsb.utils.config.ConfigUtil import ConfigUtil
+from rcsb.utils.io.MarshalUtil import MarshalUtil
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+
+HERE = os.path.abspath(os.path.dirname(__file__))
+TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
+
+
+class DictMethodRunnerTests(unittest.TestCase):
+    def setUp(self):
+        self.__numProc = 2
+        self.__fileLimit = 200
+        mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
+        self.__cachePath = os.path.join(TOPDIR, "CACHE")
+        configPath = os.path.join(mockTopPath, "config", "dbload-setup-example.yml")
+        configName = "site_info_configuration"
+        self.__configName = configName
+        self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=mockTopPath)
+        self.__mU = MarshalUtil(workPath=self.__cachePath)
+        self.__rpP = RepositoryProvider(cfgOb=self.__cfgOb, numProc=self.__numProc, fileLimit=self.__fileLimit, cachePath=self.__cachePath)
+        #
+        self.__testCaseList = [
+            {"contentType": "pdbx_core", "mockLength": 50, "mergeContent": ["vrpt"]},
+            {"contentType": "bird_chem_comp_core", "mockLength": 17, "mergeContent": None},
+        ]
+        #
+        self.__modulePathMap = self.__cfgOb.get("DICT_METHOD_HELPER_MODULE_PATH_MAP", sectionName=configName)
+        #
+        self.__startTime = time.time()
+        logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
+
+    def tearDown(self):
+        endTime = time.time()
+        logger.debug("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
+
+    def __runContentType(self, contentType, mockLength, mergeContent):
+        """Read and process test fixture data files from the input content type."""
+        try:
+            dP = DictionaryApiProviderWrapper(self.__cfgOb, self.__cachePath, useCache=True)
+            dictApi = dP.getApiByName(contentType)
+            rP = DictMethodResourceProvider(self.__cfgOb, configName=self.__configName, cachePath=self.__cachePath, siftsAbbreviated="TEST")
+            dmh = DictMethodRunner(dictApi, modulePathMap=self.__modulePathMap, resourceProvider=rP)
+            locatorObjList = self.__rpP.getLocatorObjList(contentType=contentType, mergeContentTypes=mergeContent)
+            containerList = self.__rpP.getContainerList(locatorObjList)
+            #
+            logger.debug("Length of locator list %d\n", len(locatorObjList))
+            self.assertGreaterEqual(len(locatorObjList), mockLength)
+            for container in containerList:
+                cName = container.getName()
+                logger.debug("Processing container %s", cName)
+                dmh.apply(container)
+                savePath = os.path.join(HERE, "test-output", cName + "-with-method.cif")
+                self.__mU.doExport(savePath, [container], fmt="mmcif")
+
+        except Exception as e:
+            logger.exception("Failing with %s", str(e))
+            self.fail()
+
+    def testMethodRunner(self):
+        """Test method runner for multiple content types."""
+        for tD in self.__testCaseList:
+            self.__runContentType(tD["contentType"], tD["mockLength"], tD["mergeContent"])
+
+    def testMethodRunnerSetup(self):
+        """Test the setup methods for method runner class"""
+        try:
+            dP = DictionaryApiProviderWrapper(self.__cfgOb, self.__cachePath, useCache=True)
+            dictApi = dP.getApiByName("pdbx")
+            rP = DictMethodResourceProvider(self.__cfgOb, configName=self.__configName, cachePath=self.__cachePath, siftsAbbreviated="TEST")
+            dmh = DictMethodRunner(dictApi, modulePathMap=self.__modulePathMap, resourceProvider=rP)
+            ok = dmh is not None
+            self.assertTrue(ok)
+
+        except Exception as e:
+            logger.exception("Failing with %s", str(e))
+            self.fail()
+
+
+def dictMethodRunnerSuite():
+    suiteSelect = unittest.TestSuite()
+    suiteSelect.addTest(DictMethodRunnerTests("testMethodRunner"))
+    return suiteSelect
+
+
+def dictMethodRunnerSetupSuite():
+    suiteSelect = unittest.TestSuite()
+    suiteSelect.addTest(DictMethodRunnerTests("testMethodRunnerSetup"))
+    return suiteSelect
+
+
+if __name__ == "__main__":
+
+    mySuite = dictMethodRunnerSetupSuite()
+    unittest.TextTestRunner(verbosity=2).run(mySuite)
+
+    mySuite = dictMethodRunnerSuite()
+    unittest.TextTestRunner(verbosity=2).run(mySuite)
diff --git a/rcsb/utils/tests-dictionary/testDictionaryApiProvider.py b/rcsb/utils/tests-dictionary/testDictionaryApiProvider.py
new file mode 100644
index 0000000..03997cb
--- /dev/null
+++ b/rcsb/utils/tests-dictionary/testDictionaryApiProvider.py
@@ -0,0 +1,87 @@
+##
+# File:    testDictionaryApiProvider.py
+# Author:  J. Westbrook
+# Date:    15-Aug-2019
+# Version: 0.001
+#
+# Update:
+
+##
+"""
+Tests for dictionary API provider and cache.
+
+"""
+
+__docformat__ = "restructuredtext en"
+__author__ = "John Westbrook"
+__email__ = "jwest@rcsb.rutgers.edu"
+__license__ = "Apache 2.0"
+
+import logging
+import os
+import time
+import unittest
+
+from rcsb.utils.dictionary.DictionaryApiProvider import DictionaryApiProvider
+from rcsb.utils.config.ConfigUtil import ConfigUtil
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+
+HERE = os.path.abspath(os.path.dirname(__file__))
+TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
+
+
+class DictionaryProviderTests(unittest.TestCase):
+    def setUp(self):
+        mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
+        self.__cachePath = os.path.join(TOPDIR, "CACHE")
+        self.__dirPath = os.path.join(self.__cachePath, "dictionaries")
+        configPath = os.path.join(mockTopPath, "config", "dbload-setup-example.yml")
+        configName = "site_info_configuration"
+        self.__configName = configName
+        self.__contentInfoConfigName = "content_info_helper_configuration"
+        self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=mockTopPath)
+        dictLocatorMap = self.__cfgOb.get("DICT_LOCATOR_CONFIG_MAP", sectionName=self.__contentInfoConfigName)
+        schemaName = "pdbx_core"
+        self.__dictLocators = [self.__cfgOb.getPath(configLocator, sectionName=self.__configName) for configLocator in dictLocatorMap[schemaName]]
+        #
+        self.__startTime = time.time()
+        logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
+
+    def tearDown(self):
+        endTime = time.time()
+        logger.debug("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
+
+    def testResourceCache(self):
+        """Test case - generate and check dictonary artifact and api caches"""
+        try:
+            logger.debug("Dictionary locators %r", self.__dictLocators)
+            dp = DictionaryApiProvider(dirPath=self.__dirPath, useCache=False)
+            dApi = dp.getApi(self.__dictLocators)
+            ok = dApi.testCache()
+            self.assertTrue(ok)
+            title = dApi.getDictionaryTitle()
+            logger.debug("Title %r", title)
+            self.assertEqual(title, "mmcif_pdbx.dic,rcsb_mmcif_ext.dic,vrpt_mmcif_ext.dic")
+            # revL = dApi.getDictionaryHistory()
+            numRev = dApi.getDictionaryRevisionCount()
+            logger.debug("Number of dictionary revisions (numRev) %r", numRev)
+            self.assertGreater(numRev, 220)
+            #
+        except Exception as e:
+            logger.exception("Failing with %s", str(e))
+            self.fail()
+
+
+def dictionaryProviderSuite():
+    suiteSelect = unittest.TestSuite()
+    suiteSelect.addTest(DictionaryProviderTests("testResourceCache"))
+    return suiteSelect
+
+
+if __name__ == "__main__":
+
+    mySuite = dictionaryProviderSuite()
+    unittest.TextTestRunner(verbosity=2).run(mySuite)
diff --git a/rcsb/utils/tests-dictionary/testDictionaryApiProviderWrapper.py b/rcsb/utils/tests-dictionary/testDictionaryApiProviderWrapper.py
new file mode 100644
index 0000000..2758c27
--- /dev/null
+++ b/rcsb/utils/tests-dictionary/testDictionaryApiProviderWrapper.py
@@ -0,0 +1,106 @@
+##
+# File:    testDictionaryApiProviderWrapper.py
+# Author:  J. Westbrook
+# Date:    15-Aug-2019
+# Version: 0.001
+#
+# Update:
+
+##
+"""
+Tests for dictionary API provider wrapper.
+
+"""
+
+__docformat__ = "restructuredtext en"
+__author__ = "John Westbrook"
+__email__ = "jwest@rcsb.rutgers.edu"
+__license__ = "Apache 2.0"
+
+import logging
+import os
+import time
+import unittest
+
+from rcsb.utils.dictionary.DictionaryApiProviderWrapper import DictionaryApiProviderWrapper
+from rcsb.utils.config.ConfigUtil import ConfigUtil
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+
+HERE = os.path.abspath(os.path.dirname(__file__))
+TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
+
+
+class DictionaryProviderTests(unittest.TestCase):
+    def setUp(self):
+        mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
+        self.__cachePath = os.path.join(TOPDIR, "CACHE")
+        configPath = os.path.join(mockTopPath, "config", "dbload-setup-example.yml")
+        configName = "site_info_configuration"
+        self.__configName = configName
+        self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=mockTopPath)
+        #
+        self.__contentInfoConfigName = "content_info_helper_configuration"
+        dictLocatorMap = self.__cfgOb.get("DICT_LOCATOR_CONFIG_MAP", sectionName=self.__contentInfoConfigName)
+        self.__databaseName = "pdbx_core"
+        self.__dictLocators = [self.__cfgOb.getPath(configLocator, sectionName=self.__configName) for configLocator in dictLocatorMap[self.__databaseName]]
+        #
+        self.__startTime = time.time()
+        logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
+
+    def tearDown(self):
+        endTime = time.time()
+        logger.debug("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
+
+    def testWrapperByName(self):
+        """Test case - get dictionary API by schema name"""
+        try:
+            dp = DictionaryApiProviderWrapper(self.__cfgOb, self.__cachePath, useCache=False)
+            dApi = dp.getApiByName(self.__databaseName)
+            ok = dApi.testCache()
+            self.assertTrue(ok)
+            title = dApi.getDictionaryTitle()
+            logger.debug("Title %r", title)
+            self.assertEqual(title, "mmcif_pdbx.dic,rcsb_mmcif_ext.dic,vrpt_mmcif_ext.dic")
+            # revL = dApi.getDictionaryHistory()
+            numRev = dApi.getDictionaryRevisionCount()
+            logger.debug("Number of dictionary revisions (numRev) %r", numRev)
+            self.assertGreater(numRev, 220)
+            #
+        except Exception as e:
+            logger.exception("Failing with %s", str(e))
+            self.fail()
+
+    def testWrapperByLocators(self):
+        """Test case - get dictionary API by locator list"""
+        try:
+            dp = DictionaryApiProviderWrapper(self.__cfgOb, self.__cachePath, useCache=False)
+            dApi = dp.getApiByLocators(self.__dictLocators)
+            ok = dApi.testCache()
+            self.assertTrue(ok)
+            title = dApi.getDictionaryTitle()
+            logger.debug("Title %r", title)
+            self.assertEqual(title, "mmcif_pdbx.dic,rcsb_mmcif_ext.dic,vrpt_mmcif_ext.dic")
+            # revL = dApi.getDictionaryHistory()
+            numRev = dApi.getDictionaryRevisionCount()
+            logger.debug("Number of dictionary revisions (numRev) %r", numRev)
+            self.assertGreater(numRev, 220)
+            #
+        except Exception as e:
+            logger.exception("Failing with %s", str(e))
+            self.fail()
+
+
+def dictionaryProviderSuite():
+    suiteSelect = unittest.TestSuite()
+    suiteSelect.addTest(DictionaryProviderTests("testWrapperByName"))
+    suiteSelect.addTest(DictionaryProviderTests("testWrapperByLocators"))
+    return suiteSelect
+
+
+if __name__ == "__main__":
+
+    mySuite = dictionaryProviderSuite()
+    unittest.TextTestRunner(verbosity=2).run(mySuite)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..bc657bb
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,11 @@
+mmcif >= 0.57
+rcsb.utils.config >= 0.35
+rcsb.utils.io >= 0.97
+rcsb.utils.multiproc >= 0.17
+rcsb.utils.chemref >= 0.68
+rcsb.utils.citation >= 0.15
+rcsb.utils.validation >= 0.22
+rcsb.utils.ec >= 0.21
+rcsb.utils.taxonomy >= 0.32
+rcsb.utils.seq >= 0.43
+rcsb.utils.struct >= 0.26
diff --git a/setup.cfg b/setup.cfg
new file mode 100755
index 0000000..53bb53d
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,7 @@
+[bdist_wheel]
+# use py2.py3 tag for pure-python dist:
+universal=1
+
+[metadata]
+description-file = README.md
+
diff --git a/setup.py b/setup.py
new file mode 100755
index 0000000..f900941
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,71 @@
+# File: setup.py
+# Date: 14-Feb-2021
+#
+# Update:
+#
+import re
+
+from setuptools import find_packages
+from setuptools import setup
+
+packages = []
+thisPackage = "rcsb.utils.dictionary"
+
+with open("rcsb/utils/dictionary/__init__.py", "r") as fd:
+    version = re.search(r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', fd.read(), re.MULTILINE).group(1)
+
+if not version:
+    raise RuntimeError("Cannot find version information")
+
+setup(
+    name=thisPackage,
+    version=version,
+    description="RCSB Python Dictionary Utility Classes",
+    long_description="See:  README.md",
+    author="John Westbrook",
+    author_email="john.westbrook@rcsb.org",
+    url="https://github.com/rcsb/py-rcsb_utils_dictionary",
+    #
+    license="Apache 2.0",
+    classifiers=(
+        "Development Status :: 3 - Alpha",
+        # 'Development Status :: 5 - Production/Stable',
+        "Intended Audience :: Developers",
+        "Natural Language :: English",
+        "License :: OSI Approved :: Apache Software License",
+        "Programming Language :: Python",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.9",
+    ),
+    entry_points={},
+    #
+    install_requires=[
+        "mmcif >= 0.57",
+        "rcsb.utils.io >= 0.97",
+        "rcsb.utils.config >= 0.35",
+        "rcsb.utils.multiproc >= 0.17",
+        "rcsb.utils.validation >= 0.22",
+        "rcsb.utils.chemref >= 0.68",
+        "rcsb.utils.citation >= 0.15",
+        "rcsb.utils.ec >= 0.21",
+        "rcsb.utils.taxonomy >= 0.32",
+        "rcsb.utils.seq >= 0.43",
+        "rcsb.utils.struct >= 0.26",
+    ],
+    packages=find_packages(exclude=["rcsb.utils.tests-dictionary", "rcsb.utils.tests-*", "tests.*"]),
+    package_data={
+        # If any package contains *.md or *.rst ...  files, include them:
+        "": ["*.md", "*.rst", "*.txt", "*.cfg"]
+    },
+    #
+    # These basic tests require no database services -
+    test_suite="rcsb.utils.tests-dictionary",
+    tests_require=["tox"],
+    #
+    # Not configured ...
+    extras_require={"dev": ["check-manifest"], "test": ["coverage"]},
+    # Added for
+    command_options={"build_sphinx": {"project": ("setup.py", thisPackage), "version": ("setup.py", version), "release": ("setup.py", version)}},
+    # This setting for namespace package support -
+    zip_safe=False,
+)
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..341716b
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,170 @@
+# File: tox.ini (Templated version)
+#
+[local_settings]
+# project specific local settings
+test_pattern = "test*.py"
+#
+# Source pathls (unquoted space separated list of files/directories) for linting and format checks
+source_paths = rcsb/utils/dictionary rcsb/utils/tests-dictionary setup.py
+#
+# Start directory path for test discovery
+# Each path must reference valid directory that is searchable by Python (i.e. contains __init__.py)
+#  ** It is harmless to point to paths containing no tests.
+#
+test_path_1 = "rcsb/utils/tests-dictionary"
+# These are placeholders valid source directories without tests files
+test_path_2 = "rcsb/utils/dictionary"
+test_path_3 = "rcsb/utils/dictionary"
+test_path_4 = "rcsb/utils/dictionary"
+#
+# Comma separate list of directories for which test coverage will be evaluated
+coverage_source_paths = "rcsb/utils/dictionary,rcsb/utils/tests-dictionary"
+coverage_exclude_paths = "rcsb/utils/__init__.py"
+coverage_cutoff = 50
+#
+#
+## --------------------------------------------------------------------------
+##  ----------   No project specific setting beyond this point --------------
+# Updated:  3-Jul-2019 jdw Cleanup
+#           8-Jul-2019 jdw Disable flake8 plugin (pydocstyle compat issue)
+#          12-Jul-2019 jdw Add placeholders for up to four test paths
+#          13-Jul-2019 jdw Add coverage exclusion
+##
+[tox]
+# The complete list of supported test environments to setup and invoke
+envlist = format_pep8-{py39}, lint_pylint-{py39}, format_black-{py39}, py{39}
+#
+minversion = 3.7.0
+skip_missing_interpreters = true
+skipsdist = True
+
+[testenv]
+passenv = CONFIG_SUPPORT_TOKEN_ENV OE_LICENSE
+whitelist_externals = echo
+deps = echo
+commands =
+    echo "Starting default tests in testenv"
+
+
+[testenv:py39]
+description = 'Run unit tests (unittest runner) using {envpython}'
+whitelist_externals = echo
+skip_install = True
+sitepackages = True
+recreate = True
+alwayscopy=True
+usedevelop=True
+platform=
+       macos: darwin
+       linux: linux
+basepython =
+    py39: python3.9
+
+deps = echo
+       -r requirements.txt
+commands =
+    echo "Starting {envname} with {envpython}"
+    {envpython} -V
+    {envpython} -m unittest discover -v --start-directory {[local_settings]test_path_1} --pattern "{[local_settings]test_pattern}"
+    {envpython} -m unittest discover -v --start-directory {[local_settings]test_path_2} --pattern "{[local_settings]test_pattern}"
+    {envpython} -m unittest discover -v --start-directory {[local_settings]test_path_3} --pattern "{[local_settings]test_pattern}"
+    {envpython} -m unittest discover -v --start-directory {[local_settings]test_path_4} --pattern "{[local_settings]test_pattern}"
+    echo "Completed {envname} with {envpython}"
+
+#
+[testenv:format_pep8-py39]
+description = 'Run selected PEP8 compliance checks (flake8)'
+platform=
+       macos: darwin
+       linux: linux
+whitelist_externals =
+    echo
+    flake8
+basepython = py39: python3.9
+sitepackages = True
+deps =
+    echo
+    flake8
+    # This plugin is no longer compatible with latest pydocstyles -
+    # flake8-docstrings>=0.2.7
+    flake8-import-order>=0.9
+    -r requirements.txt
+commands =
+    echo "Starting {envname}"
+    # Exceptions: D for docstrings, I for imports order and formatting, E302 is slice spacing  - W503 multiline spacing incompatible with black
+    flake8 --max-line-length=185 --ignore=D,I,E203,W503  {[local_settings]source_paths}
+    echo "Completed {envname}"
+
+#
+[testenv:lint_pylint-py39]
+description = 'Run linting compliance checks (pylint)'
+platform=
+       macos: darwin
+       linux: linux
+whitelist_externals =
+    echo
+    pylint
+basepython = py39: python3.9
+sitepackages = True
+deps =
+    echo
+    pylint
+    -r requirements.txt
+commands =
+    echo "Starting {envname}"
+    pylint --disable=R,C --reports=n --rcfile={toxinidir}/pylintrc  {[local_settings]source_paths}
+    echo "Completed {envname}"
+
+#
+[testenv:format_black-py39]
+description = 'Run format compliance checks (black)'
+platform=
+       macos: darwin
+       linux: linux
+whitelist_externals =
+    echo
+    black
+basepython = py39: python3.9
+sitepackages = True
+deps =
+    echo
+    black>=20.8b
+    -r requirements.txt
+    #    isort>=4.3.20
+commands =
+    echo "Starting {envname}"
+    black --check --line-length 180  {[local_settings]source_paths}
+    #    isort -rc rcsb/utils --check-only
+    echo "Completed {envname}"
+
+#
+[testenv:test_coverage-py39]
+description = 'Run test coverage analysis'
+platform=
+       macos: darwin
+       linux: linux
+whitelist_externals =
+    echo
+    coverage
+basepython = py39: python3.9
+recreate = true
+alwayscopy=true
+usedevelop=true
+deps =
+    echo
+    coverage
+    -r requirements.txt
+
+commands =
+    echo "Starting {envname}"
+    coverage erase
+    coverage run --parallel-mode --omit="{[local_settings]coverage_exclude_paths}" --source="{[local_settings]coverage_source_paths}" -m unittest discover -v --start-directory  {[local_settings]test_path_1} --pattern "{[local_settings]test_pattern}"
+    coverage run --parallel-mode --omit="{[local_settings]coverage_exclude_paths}"  --source="{[local_settings]coverage_source_paths}" -m unittest discover -v --start-directory  {[local_settings]test_path_2} --pattern "{[local_settings]test_pattern}"
+    coverage run --parallel-mode --omit="{[local_settings]coverage_exclude_paths}"  --source="{[local_settings]coverage_source_paths}" -m unittest discover -v --start-directory  {[local_settings]test_path_3} --pattern "{[local_settings]test_pattern}"
+    coverage run --parallel-mode --omit="{[local_settings]coverage_exclude_paths}"  --source="{[local_settings]coverage_source_paths}" -m unittest discover -v --start-directory  {[local_settings]test_path_4} --pattern "{[local_settings]test_pattern}"
+     echo " ------- Consolidating {envname} data ----------"
+    coverage combine
+    echo " ------- Building {envname} reports ----------"
+    coverage report --fail-under={[local_settings]coverage_cutoff}
+    - coverage xml
+    echo "Completed {envname}"
\ No newline at end of file