diff --git a/.gitignore b/.gitignore index f6d5382..22a7f25 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,4 @@ RAMCloud RAMCloud-install -.vscode -tmp __pycache__/ *.py[cod] diff --git a/config/supervisord.conf b/config/supervisord.conf index 66f7cf1..f21b55f 100644 --- a/config/supervisord.conf +++ b/config/supervisord.conf @@ -10,13 +10,15 @@ stderr_logfile_maxbytes=0 [program:ramcloud-coordinator] command=/usr/local/bin/rc-coordinator --externalStorage %(ENV_RC_EXTERNAL_STORAGE)s --clusterName %(ENV_RC_CLUSTER_NAME)s --coordinator basic+udp:host=%(ENV_RC_IP)s,port=11111 +autorestart=false stdout_logfile=/dev/fd/1 stdout_logfile_maxbytes=0 stderr_logfile=/dev/fd/2 stderr_logfile_maxbytes=0 [program:ramcloud-server] -command=/usr/local/bin/rc-server --externalStorage %(ENV_RC_EXTERNAL_STORAGE)s --clusterName %(ENV_RC_CLUSTER_NAME)s --local basic+udp:host=%(ENV_RC_IP)s,port=11112 +command=/usr/local/bin/rc-server --externalStorage %(ENV_RC_EXTERNAL_STORAGE)s --clusterName %(ENV_RC_CLUSTER_NAME)s --local basic+udp:host=%(ENV_RC_IP)s,port=11112 --replicas 1 +autorestart=false stdout_logfile=/dev/fd/1 stdout_logfile_maxbytes=0 stderr_logfile=/dev/fd/2 diff --git a/testing/test_cluster.py b/testing/test_cluster.py index 5250261..80f00f7 100644 --- a/testing/test_cluster.py +++ b/testing/test_cluster.py @@ -1,4 +1,5 @@ import ramcloud +import os import unittest from pyexpect import expect import cluster_test_utils as ctu @@ -38,6 +39,23 @@ def make_cluster(self, num_nodes): self.ramcloud_network) self.rc_client.connect(external_storage, 'main') + def simple_recovery(self, kill_command): + self.make_cluster(num_nodes=7) + self.createTestValue() + value = self.rc_client.read(self.table, 'testKey') + expect(value).equals(('testValue', 1)) + + # find the host corresponding to the server with our table and 'testKey', + # then kill its rc-server! + locator = self.rc_client.testing_get_service_locator(self.table, 'testKey') + host = ctu.get_host(locator) + self.node_containers[host].exec_run(kill_command) + + # read the value again (without waiting for the server to recover). It + # should come out to the same value + value = self.rc_client.read(self.table, 'testKey') + expect(value).equals(('testValue', 1)) + def test_read_write(self): self.make_cluster(num_nodes=3) self.rc_client.create_table('test_table') @@ -58,23 +76,11 @@ def test_two_writes(self): expect(value).equals('Good weather') - @unittest.skip("trying stuff out") - def test_01_simple_recovery(self): - self.make_cluster(num_nodes=3) # num_nodes=8 - self.createTestValue() - value = self.rc_client.read(self.table, 'testKey') - expect(value).equals(('testValue', 1)) - - # find the host corresponding to the server with our table and 'testKey', - # then kill it! - locator = self.rc_client.testing_get_service_locator(self.table, 'testKey') - host = ctu.get_host(locator) - self.node_containers[host].kill() + def test_01_simple_recovery_graceful_server_down(self): + self.simple_recovery(kill_command = 'killall -SIGTERM rc-server') - # read the value again (without waiting for the server to recover). It - # should come out to the same value - value = self.rc_client.read(self.table, 'testKey') - expect(value).equals(('testValue', 1)) + def test_01_simple_recovery_forced_server_down(self): + self.simple_recovery(kill_command = 'killall -SIGKILL rc-server') if __name__ == '__main__': unittest.main()