Merge pull request #11 from calacademy-research/picturae_import

shortened unittests function in sql_csv_utils and test_sql_tools.py
calacademy-research · Oct 2, 2023 · 3728c58 · 3728c58
2 parents 228266b + 97bbd82
commit 3728c58
Show file tree

Hide file tree

Showing 8 changed files with 80 additions and 184 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+**/tax_dump.sql
 **/run_picdb.sh
 **/dummy_cleaner.py
 **/picdb_config.py

diff --git a/image_client/picturae_importer.py b/image_client/picturae_importer.py
@@ -578,11 +578,10 @@ def create_collecting_event(self):
 
     def create_taxon(self):
         """create_taxon: populates the taxon table iteratively by adding higher taxa first,
-                            before lower taxa. Assigns taxa ranks and TaxonTreedefItemID.
-                        Using parent list in order to populate parent ids, by using the parsed
-                        rank levels of each taxon name.
+                         before lower taxa. Assigns taxa ranks and TaxonTreedefItemID.
+                         Using parent list in order to populate parent ids, by using the parsed
+                         rank levels of each taxon name.
         """
-        # for now do not upload
         self.parent_list = [self.full_name, self.first_intra, self.gen_spec, self.genus, self.family_name]
         self.parent_list = unique_ordered_list(self.parent_list)
         for index, taxon in reversed(list(enumerate(self.taxon_list))):
@@ -642,8 +641,6 @@ def create_collection_object(self):
                 args through create_sql_string and create_table record
                 in order to add new collectionobject record to database.
         """
-        # will new collecting event ids need to be created ?
-        # re-pulling collecting event id to reflect new record
 
         self.collecting_event_id = self.sql_csv_tools.get_one_match(tab_name='collectingevent',
                                                                     id_col='CollectingEventID',

diff --git a/image_client/sql_csv_utils.py b/image_client/sql_csv_utils.py
@@ -136,7 +136,6 @@ def insert_table_record(self, logger_int, sql):
         cursor = self.get_cursor()
         logger_int.info(f'running query: {sql}')
         logger_int.debug(sql)
-
         try:
             cursor.execute(sql)
         except Exception as e:

diff --git a/tests/casbotany_lite_creator.py b/tests/casbotany_lite_creator.py
@@ -388,9 +388,9 @@ def table_sql_list():
 
 
 def casbotany_lite_creator():
-    """casbotqny_lite_creator: casbotany_lite_creator: creates the
+    """casbotany_lite_creator: casbotany_lite_creator: creates the
                   sqllite tables contained in the sqllite DDL list"""
-    connect = sqlite3.connect('tests/casbotany_lite.db')
+    connect = sqlite3.connect('casbotany_lite.db')
     sql_list = table_sql_list()
     curs = connect.cursor()
     # running a loop through tables for sql_lite

diff --git a/tests/pic_importer_test_class.py b/tests/pic_importer_test_class.py
@@ -10,6 +10,5 @@ class TestPicturaeImporter(PicturaeImporter):
     def __init__(self, date_string, paths):
         Importer.__init__(self, db_config_class=picturae_config, collection_name="Botany")
         self.init_all_vars(date_string=date_string, paths=paths)
-        self.sqlite_csv_tools = SqlLiteTools(sql_db="../tests/casbotany_lite.db")
-        self.sql_csv_tools = SqlCsvTools(config=picturae_config)
+        self.sql_csv_tools = SqlLiteTools(sql_db="../tests/casbotany_lite.db")
         self.logger = logging.getLogger("TestPicturaeImporter")
diff --git a/tests/test_sql_tools.py b/tests/test_sql_tools.py
@@ -27,8 +27,6 @@ def setUp(self):
 
         self.sql_csv_tools = self.test_picturae_importer.sql_csv_tools
 
-        self.sqlite_csv_tools = self.test_picturae_importer.sqlite_csv_tools
-
         self.specify_db_connection = self.test_picturae_importer.specify_db_connection
 
         shutil.copyfile("../tests/casbotany_lite.db", "../tests/casbotany_backup.db")
@@ -76,97 +74,52 @@ def test_create_locality(self):
         """testing create_locality function by
            recreating insert protocol for locality table, but with sqlite DB"""
 
-        localityname = f"2 miles from eastern side of Mt.Fake + {self.md5_hash}"
-
-        column_list = ['TimestampCreated',
-                       'TimestampModified',
-                       'Version',
-                       'GUID',
-                       'SrcLatLongUnit',
-                       'LocalityName',
-                       'DisciplineID',
-                       'GeographyID']
-
-        value_list = [f"{time_utils.get_pst_time_now_string()}",
-                      f"{time_utils.get_pst_time_now_string()}",
-                      1,
-                      f"{uuid4()}",
-                      1,
-                      localityname,
-                      3,
-                      256]
-
-        # assigning row ids
-        sql = self.sql_csv_tools.create_insert_statement(tab_name="locality", col_list=column_list,
-                                                         val_list=value_list)
-        # testing insert table record
-        self.sqlite_csv_tools.insert_table_record(sql=sql, logger_int=self.logger)
-        # checking whether locality id created properly
-        data_base_locality = self.sqlite_csv_tools.get_one_match(id_col="LocalityID", tab_name="locality",
-                                                              key_col="LocalityName", match=localityname,
+        self.test_picturae_importer.locality_guid = uuid4()
+        self.test_picturae_importer.locality = f"2 miles from eastern side of Mt.Fake + {self.md5_hash}"
+        self.test_picturae_importer.GeographyID = 256
+        self.test_picturae_importer.create_by_agent = 999987
+
+        self.test_picturae_importer.create_locality_record()
+
+        data_base_locality = self.sql_csv_tools.get_one_match(id_col="LocalityID", tab_name="locality",
+                                                              key_col="LocalityName",
+                                                              match=self.test_picturae_importer.locality,
                                                               match_type="string")
 
         self.assertFalse(data_base_locality is None)
 
         # checking whether geocode present
 
-        data_base_geo_code = self.sqlite_csv_tools.get_one_match(id_col="GeographyID", tab_name="locality",
-                                                              key_col="LocalityName", match=localityname,
+        data_base_geo_code = self.sql_csv_tools.get_one_match(id_col="GeographyID", tab_name="locality",
+                                                              key_col="LocalityName",
+                                                              match=self.test_picturae_importer.locality,
                                                               match_type="string")
 
-        self.assertEqual(data_base_geo_code, 256)
+        self.assertEqual(data_base_geo_code, self.test_picturae_importer.GeographyID)
 
     def test_collection_object(self):
         """test insert of collection object"""
-        table = 'collectingevent'
-
-        column_list = ['TimestampCreated',
-                       'TimestampModified',
-                       'Version',
-                       'GUID',
-                       'DisciplineID',
-                       'StationFieldNumber',
-                       'VerbatimDate',
-                       'StartDate',
-                       'EndDate',
-                       'LocalityID',
-                       'ModifiedByAgentID',
-                       'CreatedByAgentID'
-                       ]
-
-        value_list = [f'{time_utils.get_pst_time_now_string()}',
-                      f'{time_utils.get_pst_time_now_string()}',
-                      0,
-                      f'{uuid4()}',
-                      3,
-                      f'{123456}',
-                      f'{"July 9, 1953"}',
-                      f'{"07/09/1953"}',
-                      f'{"07/09/1953"}',
-                      f'{"14523"}',
-                      f'{"95152"}',
-                      f'{"95152"}'
-                      ]
-
-        # removing na values from both lists
-        value_list, column_list = remove_two_index(value_list, column_list)
-
-        # assert that len val list and column list are equivalent.
-
-        self.assertEqual(len(value_list), len(column_list))
-
-        sql = self.sql_csv_tools.create_insert_statement(tab_name=table, col_list=column_list,
-                                                         val_list=value_list)
-
-        self.sqlite_csv_tools.insert_table_record(logger_int=self.logger, sql=sql)
-
-        station_field = self.sqlite_csv_tools.get_one_match(id_col="StationFieldNumber", tab_name="collectingevent",
-                                                             key_col="StationFieldNumber",
-                                                             match=123456, match_type="integer")
+        self.test_picturae_importer.barcode = 99999998
+        self.test_picturae_importer.collection_ob_guid = uuid4()
+        self.test_picturae_importer.created_by_agent = 999987
+
+        self.test_picturae_importer.create_collection_object()
+
+        collection_ob_guid = self.sql_csv_tools.get_one_match(id_col="GUID", tab_name="collectionobject",
+                                                               key_col="CatalogNumber",
+                                                               match=self.test_picturae_importer.barcode,
+                                                               match_type="integer")
+
+        catalog_number = self.sql_csv_tools.get_one_match(id_col="CatalogNumber", tab_name="collectionobject",
+                                                          key_col="GUID",
+                                                          match=self.test_picturae_importer.collection_ob_guid,
+                                                          match_type="string")
 
         # asserting that station field number is in right column
 
-        self.assertEqual('123456', station_field)
+        self.assertEqual(str(self.test_picturae_importer.collection_ob_guid), collection_ob_guid)
+
+        self.assertEqual(str(self.test_picturae_importer.barcode), catalog_number)
 
     def tearDown(self):
         """deleting instance of PicturaeImporter"""

diff --git a/tests/test_taxontree.py b/tests/test_taxontree.py
@@ -11,6 +11,8 @@
 from tests.testing_tools import TestingTools
 from image_client.picturae_import_utils import unique_ordered_list
 os.chdir("./image_client")
+
+
 class Testtaxontrees(unittest.TestCase, TestingTools):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -28,8 +30,6 @@ def setUp(self):
         shutil.copyfile("../tests/casbotany_lite.db", "../tests/casbotany_backup.db")
 
 
-
-
         data = {'CatalogNumber': ["12345", "12346", "12347", "12348"],
                 'verbatim_date': ['May 5 , 1955', 'May 20 , 1980', 'March 20th, 1925', 'April 5th, 2008'],
                 'start_date': ['5/05/1955', '5/20/1980', '3/20/1925', '4/05/2008'],
@@ -53,6 +53,7 @@ def setUp(self):
                 'Hybrid': [False, True, False, True],
                 'accepted_author': ['Dougl. ex Hook.', 'Erd.', 'Drew', 'Schleich. ex Ser']
                 }
+
         self.test_picturae_importer_lite.record_full = pd.DataFrame(data)
 
 
@@ -119,7 +120,6 @@ def test_generate_taxon_fields(self):
                                                                   match=self.test_picturae_importer_lite.parent_list[index+1],
                                                                   match_type="string")
 
-
                 self.assertEqual(parent_id, test_parent_id)
 
                 if self.test_picturae_importer_lite.is_hybrid is False and taxon == self.test_picturae_importer_lite.full_name:
@@ -146,104 +146,50 @@ def test_taxon_insert(self):
 
         for index, row in self.test_picturae_importer_lite.record_full.iterrows():
             self.test_picturae_importer_lite.populate_fields(row)
-            # self.test_picturae_importer_lite.taxon_guid = uuid4()
-            self.test_picturae_importer_lite.taxon_list = []
 
             self.test_picturae_importer_lite.populate_taxon()
 
 
             self.test_picturae_importer_lite.taxon_guid = uuid4()
 
-            self.test_picturae_importer_lite.parent_list = [self.test_picturae_importer_lite.full_name,
-                                                            self.test_picturae_importer_lite.first_intra,
-                                                            self.test_picturae_importer_lite.gen_spec,
-                                                            self.test_picturae_importer_lite.genus,
-                                                            self.test_picturae_importer_lite.family_name]
-
-            self.test_picturae_importer_lite.parent_list = unique_ordered_list(self.test_picturae_importer_lite.parent_list)
-
             self.test_picturae_importer_lite.create_taxon()
-            for index, taxon in reversed(list(enumerate(self.test_picturae_importer_lite.taxon_list))):
 
+            # pulling sample taxon to make sure columns line up
 
-                author_insert, tree_item_id, \
-                rank_end, parent_id, taxon_guid, rank_id = self.test_picturae_importer_lite.generate_taxon_fields(
-                                                           index=index, taxon=taxon)
-
-                column_list = ['TimestampCreated',
-                               'TimestampModified',
-                               'Version',
-                               'Author',
-                               'FullName',
-                               'GUID',
-                               'Source',
-                               'IsAccepted',
-                               'IsHybrid',
-                               'Name',
-                               'RankID',
-                               'TaxonTreeDefID',
-                               'ParentID',
-                               'ModifiedByAgentID',
-                               'CreatedByAgentID',
-                               'TaxonTreeDefItemID']
-
-                value_list = [f"{time_utils.get_pst_time_now_string()}",
-                              f"{time_utils.get_pst_time_now_string()}",
-                              1,
-                              author_insert,
-                              f"{taxon}",
-                              f"{taxon_guid}",
-                              "World Checklist of Vascular Plants 2023",
-                              True,
-                              self.test_picturae_importer_lite.is_hybrid,
-                              f"{rank_end}",
-                              f"{rank_id}",
-                              1,
-                              f"{parent_id}",
-                              f"{self.test_picturae_importer_lite.created_by_agent}",
-                              f"{self.test_picturae_importer_lite.created_by_agent}",
-                              f"{tree_item_id}"
-                              ]
-
-                value_list, column_list = remove_two_index(value_list, column_list)
-
-                sql = self.sql_csv_tools.create_insert_statement(tab_name="taxon",
-                                                                 col_list=column_list,
-                                                                 val_list=value_list)
-                self.sql_csv_tools.insert_table_record(logger_int=self.logger, sql=sql)
-
-
-                # pulling sample taxon to make sure columns line up
-
-
-                # checking taxname
-                pull_name_end = self.sql_csv_tools.get_one_match(id_col="Name", tab_name="taxon",
-                                                                 key_col="FullName",
-                                                                 match=taxon,
-                                                                 match_type="string")
-
-                self.assertEqual(pull_name_end, rank_end)
-
-                # checking parent id
-
-                pull_parent = self.sql_csv_tools.get_one_match(id_col="ParentID", tab_name="taxon",
-                                                               key_col="FullName",
-                                                               match=taxon,
-                                                               match_type="string")
-
-                self.assertEqual(pull_parent, parent_id)
-
-
-                # checking taxon id
-                pull_taxid = self.sql_csv_tools.get_one_match(id_col="TaxonID", tab_name="taxon",
-                                                              key_col="FullName",
-                                                              match=taxon,
-                                                              match_type="string")
-
-                self.assertFalse(pd.isna(pull_taxid))
-
-                logging.info(f"test taxon: {taxon} created")
-
+        # checking expected names
+        tax_ends = ["fakus", "fake x cool", "arnoldi", "summi", "x ambigua"]
+        full_name = ["Castilleja miniata subsp. fakus", "Castilleja miniata subsp. fakus var. fake x cool",
+                     "Rafflesia arnoldi", 'Rafflesia arnoldi var. summi', 'Salix x ambigua']
+        parent_names = ["Castilleja miniata", "Castilleja miniata subsp. fakus", "Rafflesia",
+                        "Rafflesia arnoldi", "Salix"]
+        for index, tax_end in enumerate(tax_ends):
+
+            name_pull = self.sql_csv_tools.get_one_match(id_col="Name", tab_name="taxon",
+                                                         key_col="FullName",
+                                                         match=full_name[index],
+                                                         match_type="string")
+            self.assertEqual(name_pull, tax_end)
+
+        # checking parent id
+
+            parent_id= self.sql_csv_tools.get_one_match(id_col="ParentID", tab_name="taxon",
+                                                        key_col="FullName",
+                                                        match=full_name[index],
+                                                           match_type="string")
+
+            parent_name = self.sql_csv_tools.get_one_match(id_col="FullName", tab_name="taxon",
+                                                           key_col="ParentID",
+                                                           match=parent_id,
+                                                           match_type="integer")
+            self.assertTrue(parent_names[index], parent_name)
+
+            # checking taxon id
+            pull_taxid = self.sql_csv_tools.get_one_match(id_col="TaxonID", tab_name="taxon",
+                                                          key_col="FullName",
+                                                          match=full_name[index],
+                                                          match_type="string")
+
+            self.assertFalse(pd.isna(pull_taxid))
 
     def tearDown(self):
         del self.test_picturae_importer_lite
@@ -252,6 +198,5 @@ def tearDown(self):
         os.remove("../tests/casbotany_backup.db")
 
 
-
 if __name__ == '__main__':
-    unittest.main()
+    unittest.main()
diff --git a/tests/tests_readme.txt b/tests/tests_readme.txt
@@ -5,6 +5,7 @@ testing_tools.py: tools to create fake data and generate unique ids for them to
 test classes:
     pic_csv_test_class.py: the test class of CsvCreatePicturae, with reduced init method for use in unittests.
     pic_importer_test_class.py: the test class of PicturaeImporter, with reduced init method for use in unittests.
+    sqlite_csv_utils.py: a test class of sql_csv_utils, for sqlite db compatibility
 
 tests for picturae_create_csv file:
     test_pic_dir.py : runs unittests for the functions : file_present
@@ -21,3 +22,4 @@ tests for picturae_importer file:
 
 
 
+