diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/run_all_tests.py b/tests/run_all_tests.py
new file mode 100644
index 0000000..05ec8eb
--- /dev/null
+++ b/tests/run_all_tests.py
@@ -0,0 +1,22 @@
+import unittest
+import sys
+import os
+
+def run_all_tests():
+    """Discover and run all tests in the tests directory"""
+    # Get the directory containing this file
+    test_dir = os.path.dirname(os.path.abspath(__file__))
+    
+    # Discover and run tests
+    loader = unittest.TestLoader()
+    suite = loader.discover(test_dir, pattern='test_*.py')
+    
+    # Run tests with verbosity
+    runner = unittest.TextTestRunner(verbosity=2)
+    result = runner.run(suite)
+    
+    # Return 0 if all tests passed, 1 if any failed
+    return 0 if result.wasSuccessful() else 1
+
+if __name__ == '__main__':
+    sys.exit(run_all_tests())
\ No newline at end of file
diff --git a/tests/test_categories.csv b/tests/test_data/test_categories.csv
similarity index 100%
rename from tests/test_categories.csv
rename to tests/test_data/test_categories.csv
diff --git a/tests/test_messages.csv b/tests/test_data/test_messages.csv
similarity index 100%
rename from tests/test_messages.csv
rename to tests/test_data/test_messages.csv
diff --git a/tests/test_data_processing.py b/tests/test_data_processing.py
deleted file mode 100644
index b0a0958..0000000
--- a/tests/test_data_processing.py
+++ /dev/null
@@ -1,211 +0,0 @@
-import sys
-import unittest
-import pandas as pd
-import os
-from sqlalchemy import create_engine
-from data import load_data
-from test_train_classifier import chicken_duties
-from data.process_data import load_data
-from data.process_data import save_data
-
-
-class TestProcessData(unittest.TestCase):
-
-    def setUp(self):
-        """Set up sample data for testing."""
-        # Sample messages and categories data
-        self.messages_data = {
-            'id': [1, 2, 3],
-            'message': ['Hello', 'Help', 'Goodbye'],
-            'original': ['Hola', 'Ayuda', 'Adiós'],
-            'genre': ['social', 'news', 'direct']
-        }
-        self.categories_data = {
-            'id': [1, 2, 3],
-            'categories': ['related-1;request-0;offer-0', 
-                           'related-1;request-1;offer-0', 
-                           'related-0;request-0;offer-1']
-        }
-
-        # Create DataFrames from sample data
-        self.messages_df = pd.DataFrame(self.messages_data)
-        self.categories_df = pd.DataFrame(self.categories_data)
-
-    def test_load_data(self):
-        """Test loading and merging data."""
-        # Create temporary CSV files for messages and categories
-        self.messages_df.to_csv('test_messages.csv', index=False)
-        self.categories_df.to_csv('test_categories.csv', index=False)
-        
-        # Load and merge data
-        df = load_data('test_messages.csv', 'test_categories.csv')
-        
-        # Test the shape and contents of the combined dataframe
-        self.assertEqual(df.shape[0], 3)  # Check the number of rows
-        self.assertIn('message', df.columns)  # Check for specific column
-        self.assertIn('categories', df.columns)  # Check for specific column
-        
-        # Clean up temporary files
-        os.remove('test_messages.csv')
-        os.remove('test_categories.csv')
-
-    def test_clean_data(self):
-        """Test cleaning of data."""
-        # Merge the messages and categories first
-        df = pd.merge(self.messages_df, self.categories_df, on="id")
-        
-        # Clean the merged dataframe
-        cleaned_df = clean_data(df)
-        
-        # Test the new columns are binary and correctly converted
-        self.assertEqual(cleaned_df.shape[0], 3)  # Check the number of rows
-        self.assertIn('related', cleaned_df.columns)  # Check for expanded categories
-        self.assertTrue(all(cleaned_df['related'].isin([0, 1])))  # Ensure binary conversion
-        
-        # Test that there are no duplicates
-        self.assertFalse(cleaned_df.duplicated().any())
-
-    def test_save_data(self):
-        """Test saving data to a SQLite database."""
-        # Sample cleaned dataframe
-        cleaned_df = clean_data(pd.merge(self.messages_df, self.categories_df, on="id"))
-        
-        # Save to an SQLite database
-        save_data(cleaned_df, 'test_database.db')
-        
-        # Test that the database and table exist
-        engine = create_engine("sqlite:///test_database.db")
-        table_names = engine.table_names()
-        self.assertIn('cleandata', table_names)  # Check that table was created
-        
-        # Clean up the test database file
-        os.remove('test_database.db')
-
-if __name__ == '__main__':
-    unittest.main()
-
-    
-class TestDataProcessing(unittest.TestCase):
-    
-    def setUp(self):
-        # Sample CSV data for testing
-        self.messages_data = """id,message
-        1,Hello World
-        2,Machine Learning is fun
-        3,Unit testing is important
-        """
-        self.categories_data = """id,categories
-        1,related;request;1;0;0
-        2,related;offer;0;1;0
-        3,request;related;0;0;1
-        """
-        # Create DataFrames
-        self.messages_filepath = 'messages_test.csv'
-        self.categories_filepath = 'categories_test.csv'
-        self.df_messages = pd.read_csv(StringIO(self.messages_data))
-        self.df_categories = pd.read_csv(StringIO(self.categories_data))
-
-        # Save to CSV for testing
-        self.df_messages.to_csv(self.messages_filepath, index=False)
-        self.df_categories.to_csv(self.categories_filepath, index=False)
-
-    def test_load_data(self):
-        """Test loading data from CSV files."""
-        df = load_data(self.messages_filepath, self.categories_filepath)
-        self.assertEqual(df.shape[0], 3)  # Check if 3 rows are loaded
-        self.assertIn('message', df.columns)  # Check if 'message' column exists
-        self.assertIn('categories', df.columns)  # Check if 'categories' column exists
-
-    def test_clean_data(self):
-        """Test cleaning of data."""
-        df = load_data(self.messages_filepath, self.categories_filepath)
-        cleaned_df = clean_data(df)
-        self.assertIn('related', cleaned_df.columns)  # Check if the category 'related' is present
-        self.assertIn('request', cleaned_df.columns)  # Check if the category 'request' is present
-        self.assertTrue((cleaned_df['related'].isin([0, 1])).all())  # Check binary values in 'related' column
-
-    def test_save_data(self):
-        """Test saving data to SQLite database."""
-        df = load_data(self.messages_filepath, self.categories_filepath)
-        cleaned_df = clean_data(df)
-        database_path = 'test_database.db'
-        
-        # Use a context manager to avoid leaving the database open
-        with self.assertRaises(Exception):
-            save_data(cleaned_df, database_path)
-
-    def tearDown(self):
-        import os
-        os.remove(self.messages_filepath)  # Remove test CSV files
-        os.remove(self.categories_filepath)
-
-if __name__ == '__main__':
-    unittest.main()
-    
-    
-print(tokenize('There.'))
-print(WordNetLemmatizer().lemmatize('there'))
-'there' in set(stopwords.words("english"))
-
-
-def display_dataset(X_train, y_train, X_test=None, y_test=None):
-    """
-    """
-    print("unique Y values: ", np.unique(Y))
-    print("training set, X: ", X_train.shape)
-    if X_test is not None:
-        print("test set, X: ",X_test.shape)
-    print("training set, Y: ",y_train.shape)
-    if y_test is not None:
-        print("test set, Y: ",y_test.shape)
-        
-def data_type_check(X1, X2):
-    """
-    """
-    # check data types of 
-    print("X1 shape: ", X1.shape)
-    print("X2 shape: ", X2.shape)
-    print("X1 Type: ", type(X1))
-    print("X2 Type: ",type(X2)) 
-    
-    
-text = 'What can I do?'
-tokens = tokenize(text)
-print(tokens)
-for token in word_tokenize(text.lower()):
-    print(WordNetLemmatizer().lemmatize(token))
-    print(f'{token}, {token in set(stopwords.words("english"))}')
-    
-    
-
-# print(accuracy(y_test, y_pred))
-
-# # Now you can generate the classification report
-# for col_index in range(0,y_test.shape[1]):
-#     report = classification_report(y_test[:,col_index], y_pred[:, col_index], zero_division=0)
-#     print(classes[col_index])
-#     print(report)
-
-
-
-
-
-
-
-#Testing Tokenize Function - Use this to test the output of the tokenize function.
-
-text1 = "Barclaysjbki CEO stresses the importance of regulatory and cultural reform in financial services at Brussels conference  https://www.google.com"
-print(f'input text: "{text1}"\n')
-print(f"text tokens: {tokenize(text1)} \n")
-text2 = "The No. 8 Northeast Gale or storm signal was issued at 5.55pm yesterday (September 14) and was replaced by Southeast gale and storm signal at 12.35am today (September 15)."
-print(f'input text: "{text2}" \n')
-print(f"text tokens: {tokenize(text2)} \n")
-sentence_list = sent_tokenize(text2)
-print(f"sentences: {sentence_list} \n")
-print("testing sentence tokenization...")
-for text in sentence_list:
-    print(f'\ntext: "{text}"')
-    print(f"\ntext tokens: {tokenize(text)}")
-    
-    
-    
\ No newline at end of file
diff --git a/tests/test_process_data.py b/tests/test_process_data.py
new file mode 100644
index 0000000..f4a7dda
--- /dev/null
+++ b/tests/test_process_data.py
@@ -0,0 +1,159 @@
+# tests/test_process_data.py
+import unittest
+import sys
+import os
+import pandas as pd
+from sqlalchemy import create_engine
+
+# Add the parent directory to the Python path
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+from data.process_data import load_data, clean_data, save_data
+
+class TestProcessData(unittest.TestCase):
+    """Test cases for disaster response data processing functions"""
+    
+    @classmethod
+    def setUpClass(cls):
+        """Set up test fixtures including sample CSV files"""
+        # Define paths for test files
+        cls.test_data_dir = os.path.join(os.path.dirname(__file__), 'test_data')
+        cls.messages_filepath = os.path.join(cls.test_data_dir, 'test_messages.csv')
+        cls.categories_filepath = os.path.join(cls.test_data_dir, 'test_categories.csv')
+        cls.database_filepath = os.path.join(cls.test_data_dir, 'test_db.db')
+
+        # Create test_data directory if it doesn't exist
+        os.makedirs(cls.test_data_dir, exist_ok=True)
+
+    def setUp(self):
+        """Create sample CSV files before each test"""
+        # Create a small sample messages.csv file
+        messages_data = """id,message,original,genre
+1,"Help! We need water","Help! We need water",direct
+2,"Need food and shelter","Need food and shelter",direct
+3,"No electricity","No electricity",news
+4,"Medical assistance needed","Medical assistance needed",news"""
+        
+        # Create a small sample categories.csv file
+        categories_data = """id,categories
+1,"related-1;request-1;aid_related-1;medical_help-0;water-1"
+2,"related-1;request-1;aid_related-1;medical_help-0;food-1;shelter-1"
+3,"related-1;request-0;aid_related-1;medical_help-0;infrastructure_related-1;electricity-1"
+4,"related-1;request-1;aid_related-1;medical_help-1;medical_assistance-1" """
+
+        # Write test CSV files
+        with open(self.messages_filepath, 'w') as f:
+            f.write(messages_data)
+        
+        with open(self.categories_filepath, 'w') as f:
+            f.write(categories_data)
+
+    def test_load_data(self):
+        """Test if load_data correctly loads and merges the datasets"""
+        df = load_data(self.messages_filepath, self.categories_filepath)
+        
+        # Test the loaded data structure
+        self.assertEqual(len(df), 4)  # Should have 4 rows
+        self.assertTrue(all(col in df.columns 
+                          for col in ['id', 'message', 'original', 'genre', 'categories']))
+        
+        # Test data content
+        self.assertEqual(df.iloc[0]['message'], 'Help! We need water')
+        self.assertEqual(df.iloc[0]['genre'], 'direct')
+
+    def test_clean_data(self):
+        """Test if clean_data correctly processes the DataFrame"""
+        # First load the data
+        df = load_data(self.messages_filepath, self.categories_filepath)
+        cleaned_df = clean_data(df)
+        
+        # Test binary values in category columns
+        category_columns = [col for col in cleaned_df.columns 
+                          if col not in ['id', 'message', 'original', 'genre']]
+        
+        for col in category_columns:
+            unique_vals = cleaned_df[col].unique()
+            self.assertTrue(all(val in [0.0, 1.0] for val in unique_vals),
+                          f"Column {col} contains non-binary values: {unique_vals}")
+        
+        # Test no duplicates
+        self.assertEqual(len(cleaned_df), len(cleaned_df.drop_duplicates()))
+        
+        # Test expected transformations
+        self.assertTrue('water' in cleaned_df.columns)
+        self.assertEqual(cleaned_df.iloc[0]['water'], 1.0)
+
+    def test_save_data(self):
+        """Test if save_data correctly saves the DataFrame to SQLite database"""
+        # Load and clean the data
+        df = load_data(self.messages_filepath, self.categories_filepath)
+        cleaned_df = clean_data(df)
+        
+        # Save to database
+        save_data(cleaned_df, self.database_filepath)
+        
+        # Verify data was saved correctly
+        engine = create_engine(f'sqlite:///{self.database_filepath}')
+        saved_df = pd.read_sql_table('cleandata', engine)
+        
+        self.assertEqual(len(saved_df), len(cleaned_df))
+        self.assertTrue(all(col in saved_df.columns for col in cleaned_df.columns))
+
+    def tearDown(self):
+        """Clean up test files after each test"""
+        # Remove test files
+        for filepath in [self.messages_filepath, self.categories_filepath, self.database_filepath]:
+            try:
+                if os.path.exists(filepath):
+                    os.remove(filepath)
+            except PermissionError:
+                pass  # Handle Windows file lock issues
+
+    @classmethod
+    def tearDownClass(cls):
+        """Clean up test directory after all tests"""
+        try:
+            os.rmdir(cls.test_data_dir)
+        except (OSError, PermissionError):
+            pass  # Directory might not be empty or might be locked
+
+class TestTextProcessing(unittest.TestCase):
+    """Test cases for text processing functions"""
+    
+    def setUp(self):
+        """Load sample messages from test CSV"""
+        messages_filepath = os.path.join(
+            os.path.dirname(__file__), 
+            'test_data/test_messages.csv'
+        )
+        if os.path.exists(messages_filepath):
+            self.test_df = pd.read_csv(messages_filepath)
+        else:
+            self.test_df = pd.DataFrame({
+                'message': [
+                    'Help! Need water.',
+                    'We need medical supplies and food immediately!!',
+                    'Need 100 blankets at shelter 5'
+                ]
+            })
+
+    def test_tokenize(self):
+        """Test if tokenize correctly processes text"""
+        from data.process_data import tokenize
+        
+        # Test first message
+        tokens = tokenize(self.test_df['message'].iloc[0])
+        self.assertTrue(all(isinstance(token, str) for token in tokens))
+        self.assertTrue(len(tokens) > 0)
+
+    def test_tokenize_empty(self):
+        """Test tokenize with empty input"""
+        from data.process_data import tokenize
+        self.assertEqual(tokenize(''), [])
+
+    def tearDown(self):
+        """Clean up test resources"""
+        # Clear the test DataFrame
+        self.test_df = None
+
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file