Skip to content

Commit

Permalink
Merge pull request #11 from calacademy-research/picturae_import
Browse files Browse the repository at this point in the history
shortened unittests function in sql_csv_utils and test_sql_tools.py
  • Loading branch information
foozleface authored Oct 2, 2023
2 parents 228266b + 97bbd82 commit 3728c58
Show file tree
Hide file tree
Showing 8 changed files with 80 additions and 184 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
**/tax_dump.sql
**/run_picdb.sh
**/dummy_cleaner.py
**/picdb_config.py
Expand Down
9 changes: 3 additions & 6 deletions image_client/picturae_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,11 +578,10 @@ def create_collecting_event(self):

def create_taxon(self):
"""create_taxon: populates the taxon table iteratively by adding higher taxa first,
before lower taxa. Assigns taxa ranks and TaxonTreedefItemID.
Using parent list in order to populate parent ids, by using the parsed
rank levels of each taxon name.
before lower taxa. Assigns taxa ranks and TaxonTreedefItemID.
Using parent list in order to populate parent ids, by using the parsed
rank levels of each taxon name.
"""
# for now do not upload
self.parent_list = [self.full_name, self.first_intra, self.gen_spec, self.genus, self.family_name]
self.parent_list = unique_ordered_list(self.parent_list)
for index, taxon in reversed(list(enumerate(self.taxon_list))):
Expand Down Expand Up @@ -642,8 +641,6 @@ def create_collection_object(self):
args through create_sql_string and create_table record
in order to add new collectionobject record to database.
"""
# will new collecting event ids need to be created ?
# re-pulling collecting event id to reflect new record

self.collecting_event_id = self.sql_csv_tools.get_one_match(tab_name='collectingevent',
id_col='CollectingEventID',
Expand Down
1 change: 0 additions & 1 deletion image_client/sql_csv_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,6 @@ def insert_table_record(self, logger_int, sql):
cursor = self.get_cursor()
logger_int.info(f'running query: {sql}')
logger_int.debug(sql)

try:
cursor.execute(sql)
except Exception as e:
Expand Down
4 changes: 2 additions & 2 deletions tests/casbotany_lite_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,9 +388,9 @@ def table_sql_list():


def casbotany_lite_creator():
"""casbotqny_lite_creator: casbotany_lite_creator: creates the
"""casbotany_lite_creator: casbotany_lite_creator: creates the
sqllite tables contained in the sqllite DDL list"""
connect = sqlite3.connect('tests/casbotany_lite.db')
connect = sqlite3.connect('casbotany_lite.db')
sql_list = table_sql_list()
curs = connect.cursor()
# running a loop through tables for sql_lite
Expand Down
3 changes: 1 addition & 2 deletions tests/pic_importer_test_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,5 @@ class TestPicturaeImporter(PicturaeImporter):
def __init__(self, date_string, paths):
Importer.__init__(self, db_config_class=picturae_config, collection_name="Botany")
self.init_all_vars(date_string=date_string, paths=paths)
self.sqlite_csv_tools = SqlLiteTools(sql_db="../tests/casbotany_lite.db")
self.sql_csv_tools = SqlCsvTools(config=picturae_config)
self.sql_csv_tools = SqlLiteTools(sql_db="../tests/casbotany_lite.db")
self.logger = logging.getLogger("TestPicturaeImporter")
111 changes: 32 additions & 79 deletions tests/test_sql_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ def setUp(self):

self.sql_csv_tools = self.test_picturae_importer.sql_csv_tools

self.sqlite_csv_tools = self.test_picturae_importer.sqlite_csv_tools

self.specify_db_connection = self.test_picturae_importer.specify_db_connection

shutil.copyfile("../tests/casbotany_lite.db", "../tests/casbotany_backup.db")
Expand Down Expand Up @@ -76,97 +74,52 @@ def test_create_locality(self):
"""testing create_locality function by
recreating insert protocol for locality table, but with sqlite DB"""

localityname = f"2 miles from eastern side of Mt.Fake + {self.md5_hash}"

column_list = ['TimestampCreated',
'TimestampModified',
'Version',
'GUID',
'SrcLatLongUnit',
'LocalityName',
'DisciplineID',
'GeographyID']

value_list = [f"{time_utils.get_pst_time_now_string()}",
f"{time_utils.get_pst_time_now_string()}",
1,
f"{uuid4()}",
1,
localityname,
3,
256]

# assigning row ids
sql = self.sql_csv_tools.create_insert_statement(tab_name="locality", col_list=column_list,
val_list=value_list)
# testing insert table record
self.sqlite_csv_tools.insert_table_record(sql=sql, logger_int=self.logger)
# checking whether locality id created properly
data_base_locality = self.sqlite_csv_tools.get_one_match(id_col="LocalityID", tab_name="locality",
key_col="LocalityName", match=localityname,
self.test_picturae_importer.locality_guid = uuid4()
self.test_picturae_importer.locality = f"2 miles from eastern side of Mt.Fake + {self.md5_hash}"
self.test_picturae_importer.GeographyID = 256
self.test_picturae_importer.create_by_agent = 999987

self.test_picturae_importer.create_locality_record()

data_base_locality = self.sql_csv_tools.get_one_match(id_col="LocalityID", tab_name="locality",
key_col="LocalityName",
match=self.test_picturae_importer.locality,
match_type="string")

self.assertFalse(data_base_locality is None)

# checking whether geocode present

data_base_geo_code = self.sqlite_csv_tools.get_one_match(id_col="GeographyID", tab_name="locality",
key_col="LocalityName", match=localityname,
data_base_geo_code = self.sql_csv_tools.get_one_match(id_col="GeographyID", tab_name="locality",
key_col="LocalityName",
match=self.test_picturae_importer.locality,
match_type="string")

self.assertEqual(data_base_geo_code, 256)
self.assertEqual(data_base_geo_code, self.test_picturae_importer.GeographyID)

def test_collection_object(self):
"""test insert of collection object"""
table = 'collectingevent'

column_list = ['TimestampCreated',
'TimestampModified',
'Version',
'GUID',
'DisciplineID',
'StationFieldNumber',
'VerbatimDate',
'StartDate',
'EndDate',
'LocalityID',
'ModifiedByAgentID',
'CreatedByAgentID'
]

value_list = [f'{time_utils.get_pst_time_now_string()}',
f'{time_utils.get_pst_time_now_string()}',
0,
f'{uuid4()}',
3,
f'{123456}',
f'{"July 9, 1953"}',
f'{"07/09/1953"}',
f'{"07/09/1953"}',
f'{"14523"}',
f'{"95152"}',
f'{"95152"}'
]

# removing na values from both lists
value_list, column_list = remove_two_index(value_list, column_list)

# assert that len val list and column list are equivalent.

self.assertEqual(len(value_list), len(column_list))

sql = self.sql_csv_tools.create_insert_statement(tab_name=table, col_list=column_list,
val_list=value_list)

self.sqlite_csv_tools.insert_table_record(logger_int=self.logger, sql=sql)

station_field = self.sqlite_csv_tools.get_one_match(id_col="StationFieldNumber", tab_name="collectingevent",
key_col="StationFieldNumber",
match=123456, match_type="integer")
self.test_picturae_importer.barcode = 99999998
self.test_picturae_importer.collection_ob_guid = uuid4()
self.test_picturae_importer.created_by_agent = 999987

self.test_picturae_importer.create_collection_object()

collection_ob_guid = self.sql_csv_tools.get_one_match(id_col="GUID", tab_name="collectionobject",
key_col="CatalogNumber",
match=self.test_picturae_importer.barcode,
match_type="integer")

catalog_number = self.sql_csv_tools.get_one_match(id_col="CatalogNumber", tab_name="collectionobject",
key_col="GUID",
match=self.test_picturae_importer.collection_ob_guid,
match_type="string")

# asserting that station field number is in right column

self.assertEqual('123456', station_field)
self.assertEqual(str(self.test_picturae_importer.collection_ob_guid), collection_ob_guid)

self.assertEqual(str(self.test_picturae_importer.barcode), catalog_number)

def tearDown(self):
"""deleting instance of PicturaeImporter"""
Expand Down
133 changes: 39 additions & 94 deletions tests/test_taxontree.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from tests.testing_tools import TestingTools
from image_client.picturae_import_utils import unique_ordered_list
os.chdir("./image_client")


class Testtaxontrees(unittest.TestCase, TestingTools):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
Expand All @@ -28,8 +30,6 @@ def setUp(self):
shutil.copyfile("../tests/casbotany_lite.db", "../tests/casbotany_backup.db")




data = {'CatalogNumber': ["12345", "12346", "12347", "12348"],
'verbatim_date': ['May 5 , 1955', 'May 20 , 1980', 'March 20th, 1925', 'April 5th, 2008'],
'start_date': ['5/05/1955', '5/20/1980', '3/20/1925', '4/05/2008'],
Expand All @@ -53,6 +53,7 @@ def setUp(self):
'Hybrid': [False, True, False, True],
'accepted_author': ['Dougl. ex Hook.', 'Erd.', 'Drew', 'Schleich. ex Ser']
}

self.test_picturae_importer_lite.record_full = pd.DataFrame(data)


Expand Down Expand Up @@ -119,7 +120,6 @@ def test_generate_taxon_fields(self):
match=self.test_picturae_importer_lite.parent_list[index+1],
match_type="string")


self.assertEqual(parent_id, test_parent_id)

if self.test_picturae_importer_lite.is_hybrid is False and taxon == self.test_picturae_importer_lite.full_name:
Expand All @@ -146,104 +146,50 @@ def test_taxon_insert(self):

for index, row in self.test_picturae_importer_lite.record_full.iterrows():
self.test_picturae_importer_lite.populate_fields(row)
# self.test_picturae_importer_lite.taxon_guid = uuid4()
self.test_picturae_importer_lite.taxon_list = []

self.test_picturae_importer_lite.populate_taxon()


self.test_picturae_importer_lite.taxon_guid = uuid4()

self.test_picturae_importer_lite.parent_list = [self.test_picturae_importer_lite.full_name,
self.test_picturae_importer_lite.first_intra,
self.test_picturae_importer_lite.gen_spec,
self.test_picturae_importer_lite.genus,
self.test_picturae_importer_lite.family_name]

self.test_picturae_importer_lite.parent_list = unique_ordered_list(self.test_picturae_importer_lite.parent_list)

self.test_picturae_importer_lite.create_taxon()
for index, taxon in reversed(list(enumerate(self.test_picturae_importer_lite.taxon_list))):

# pulling sample taxon to make sure columns line up

author_insert, tree_item_id, \
rank_end, parent_id, taxon_guid, rank_id = self.test_picturae_importer_lite.generate_taxon_fields(
index=index, taxon=taxon)

column_list = ['TimestampCreated',
'TimestampModified',
'Version',
'Author',
'FullName',
'GUID',
'Source',
'IsAccepted',
'IsHybrid',
'Name',
'RankID',
'TaxonTreeDefID',
'ParentID',
'ModifiedByAgentID',
'CreatedByAgentID',
'TaxonTreeDefItemID']

value_list = [f"{time_utils.get_pst_time_now_string()}",
f"{time_utils.get_pst_time_now_string()}",
1,
author_insert,
f"{taxon}",
f"{taxon_guid}",
"World Checklist of Vascular Plants 2023",
True,
self.test_picturae_importer_lite.is_hybrid,
f"{rank_end}",
f"{rank_id}",
1,
f"{parent_id}",
f"{self.test_picturae_importer_lite.created_by_agent}",
f"{self.test_picturae_importer_lite.created_by_agent}",
f"{tree_item_id}"
]

value_list, column_list = remove_two_index(value_list, column_list)

sql = self.sql_csv_tools.create_insert_statement(tab_name="taxon",
col_list=column_list,
val_list=value_list)
self.sql_csv_tools.insert_table_record(logger_int=self.logger, sql=sql)


# pulling sample taxon to make sure columns line up


# checking taxname
pull_name_end = self.sql_csv_tools.get_one_match(id_col="Name", tab_name="taxon",
key_col="FullName",
match=taxon,
match_type="string")

self.assertEqual(pull_name_end, rank_end)

# checking parent id

pull_parent = self.sql_csv_tools.get_one_match(id_col="ParentID", tab_name="taxon",
key_col="FullName",
match=taxon,
match_type="string")

self.assertEqual(pull_parent, parent_id)


# checking taxon id
pull_taxid = self.sql_csv_tools.get_one_match(id_col="TaxonID", tab_name="taxon",
key_col="FullName",
match=taxon,
match_type="string")

self.assertFalse(pd.isna(pull_taxid))

logging.info(f"test taxon: {taxon} created")

# checking expected names
tax_ends = ["fakus", "fake x cool", "arnoldi", "summi", "x ambigua"]
full_name = ["Castilleja miniata subsp. fakus", "Castilleja miniata subsp. fakus var. fake x cool",
"Rafflesia arnoldi", 'Rafflesia arnoldi var. summi', 'Salix x ambigua']
parent_names = ["Castilleja miniata", "Castilleja miniata subsp. fakus", "Rafflesia",
"Rafflesia arnoldi", "Salix"]
for index, tax_end in enumerate(tax_ends):

name_pull = self.sql_csv_tools.get_one_match(id_col="Name", tab_name="taxon",
key_col="FullName",
match=full_name[index],
match_type="string")
self.assertEqual(name_pull, tax_end)

# checking parent id

parent_id= self.sql_csv_tools.get_one_match(id_col="ParentID", tab_name="taxon",
key_col="FullName",
match=full_name[index],
match_type="string")

parent_name = self.sql_csv_tools.get_one_match(id_col="FullName", tab_name="taxon",
key_col="ParentID",
match=parent_id,
match_type="integer")
self.assertTrue(parent_names[index], parent_name)

# checking taxon id
pull_taxid = self.sql_csv_tools.get_one_match(id_col="TaxonID", tab_name="taxon",
key_col="FullName",
match=full_name[index],
match_type="string")

self.assertFalse(pd.isna(pull_taxid))

def tearDown(self):
del self.test_picturae_importer_lite
Expand All @@ -252,6 +198,5 @@ def tearDown(self):
os.remove("../tests/casbotany_backup.db")



if __name__ == '__main__':
unittest.main()
unittest.main()
2 changes: 2 additions & 0 deletions tests/tests_readme.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ testing_tools.py: tools to create fake data and generate unique ids for them to
test classes:
pic_csv_test_class.py: the test class of CsvCreatePicturae, with reduced init method for use in unittests.
pic_importer_test_class.py: the test class of PicturaeImporter, with reduced init method for use in unittests.
sqlite_csv_utils.py: a test class of sql_csv_utils, for sqlite db compatibility

tests for picturae_create_csv file:
test_pic_dir.py : runs unittests for the functions : file_present
Expand All @@ -21,3 +22,4 @@ tests for picturae_importer file:




0 comments on commit 3728c58

Please sign in to comment.