diff --git a/.github/workflows/bitcoin-tests.yml b/.github/workflows/bitcoin-tests.yml index 2f1ea4f219..b07c0dc2e4 100644 --- a/.github/workflows/bitcoin-tests.yml +++ b/.github/workflows/bitcoin-tests.yml @@ -135,6 +135,7 @@ jobs: - tests::signer::v0::block_commit_delay - tests::signer::v0::continue_after_fast_block_no_sortition - tests::signer::v0::block_validation_response_timeout + - tests::signer::v0::block_validation_check_rejection_timeout_heuristic - tests::signer::v0::block_validation_pending_table - tests::signer::v0::new_tenure_while_validating_previous_scenario - tests::signer::v0::tenure_extend_after_bad_commit diff --git a/CHANGELOG.md b/CHANGELOG.md index 1644446dd7..64f0bc2164 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to the versioning scheme outlined in the [README.md](RE ### Changed - Miner will include other transactions in blocks with tenure extend transactions (#5760) +- Add `block_rejection_timeout_steps` to miner configuration for defining rejections-based timeouts while waiting for signers response (#5705) - Miner will not issue a tenure extend until at least half of the block budget has been spent (#5757) ### Fixed diff --git a/stackslib/src/config/mod.rs b/stackslib/src/config/mod.rs index 2e494cacef..a9a03d4861 100644 --- a/stackslib/src/config/mod.rs +++ b/stackslib/src/config/mod.rs @@ -16,7 +16,7 @@ pub mod chain_data; -use std::collections::{HashMap, HashSet}; +use std::collections::{BTreeMap, HashMap, HashSet}; use std::net::{Ipv4Addr, SocketAddr, ToSocketAddrs}; use std::path::PathBuf; use std::str::FromStr; @@ -2184,6 +2184,8 @@ pub struct MinerConfig { pub tenure_timeout: Duration, /// Percentage of block budget that must be used before attempting a time-based tenure extend pub tenure_extend_cost_threshold: u64, + /// Define the timeout to apply while waiting for signers responses, based on the amount of rejections + pub block_rejection_timeout_steps: HashMap, } impl Default for MinerConfig { @@ -2223,6 +2225,15 @@ impl Default for MinerConfig { tenure_extend_poll_secs: Duration::from_secs(DEFAULT_TENURE_EXTEND_POLL_SECS), tenure_timeout: Duration::from_secs(DEFAULT_TENURE_TIMEOUT_SECS), tenure_extend_cost_threshold: DEFAULT_TENURE_EXTEND_COST_THRESHOLD, + + block_rejection_timeout_steps: { + let mut rejections_timeouts_default_map = HashMap::::new(); + rejections_timeouts_default_map.insert(0, Duration::from_secs(600)); + rejections_timeouts_default_map.insert(10, Duration::from_secs(300)); + rejections_timeouts_default_map.insert(20, Duration::from_secs(150)); + rejections_timeouts_default_map.insert(30, Duration::from_secs(0)); + rejections_timeouts_default_map + }, } } } @@ -2620,6 +2631,7 @@ pub struct MinerConfigFile { pub tenure_extend_poll_secs: Option, pub tenure_timeout_secs: Option, pub tenure_extend_cost_threshold: Option, + pub block_rejection_timeout_steps: Option>, } impl MinerConfigFile { @@ -2763,6 +2775,24 @@ impl MinerConfigFile { tenure_extend_poll_secs: self.tenure_extend_poll_secs.map(Duration::from_secs).unwrap_or(miner_default_config.tenure_extend_poll_secs), tenure_timeout: self.tenure_timeout_secs.map(Duration::from_secs).unwrap_or(miner_default_config.tenure_timeout), tenure_extend_cost_threshold: self.tenure_extend_cost_threshold.unwrap_or(miner_default_config.tenure_extend_cost_threshold), + + block_rejection_timeout_steps: { + if let Some(block_rejection_timeout_items) = self.block_rejection_timeout_steps { + let mut rejection_timeout_durations = HashMap::::new(); + for (slice, seconds) in block_rejection_timeout_items.iter() { + match slice.parse::() { + Ok(slice_slot) => rejection_timeout_durations.insert(slice_slot, Duration::from_secs(*seconds)), + Err(e) => panic!("block_rejection_timeout_steps keys must be unsigned integers: {}", e) + }; + } + if !rejection_timeout_durations.contains_key(&0) { + panic!("block_rejection_timeout_steps requires a definition for the '0' key/step"); + } + rejection_timeout_durations + } else{ + miner_default_config.block_rejection_timeout_steps + } + } }) } } diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index 58d5f2da89..2138b7e767 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -13,9 +13,12 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . +use std::collections::BTreeMap; +use std::ops::Bound::Included; use std::sync::atomic::AtomicBool; use std::sync::{Arc, Mutex}; use std::thread::JoinHandle; +use std::time::{Duration, Instant}; use libsigner::v0::messages::{MinerSlotID, SignerMessage as SignerMessageV0}; use libsigner::{BlockProposal, SignerSession, StackerDBSession}; @@ -66,6 +69,8 @@ pub struct SignerCoordinator { /// Rather, this burn block is used to determine whether or not a new /// burn block has arrived since this thread started. burn_tip_at_start: ConsensusHash, + /// The timeout configuration based on the percentage of rejections + block_rejection_timeout_steps: BTreeMap, } impl SignerCoordinator { @@ -101,6 +106,14 @@ impl SignerCoordinator { let miners_contract_id = boot_code_id(MINERS_NAME, is_mainnet); let miners_session = StackerDBSession::new(&rpc_socket.to_string(), miners_contract_id); + // build a BTreeMap of the various timeout steps + let mut block_rejection_timeout_steps = BTreeMap::::new(); + for (percentage, duration) in config.miner.block_rejection_timeout_steps.iter() { + let rejections_amount = + ((f64::from(listener.total_weight) / 100.0) * f64::from(*percentage)) as u32; + block_rejection_timeout_steps.insert(rejections_amount, *duration); + } + let mut sc = Self { message_key, is_mainnet, @@ -111,6 +124,7 @@ impl SignerCoordinator { keep_running, listener_thread: None, burn_tip_at_start: burn_tip_at_start.clone(), + block_rejection_timeout_steps, }; // Spawn the signer DB listener thread @@ -293,16 +307,38 @@ impl SignerCoordinator { sortdb: &SortitionDB, counters: &Counters, ) -> Result, NakamotoNodeError> { + // the amount of current rejections (used to eventually modify the timeout) + let mut rejections: u32 = 0; + // default timeout (the 0 entry must be always present) + let mut rejections_timeout = self + .block_rejection_timeout_steps + .get(&rejections) + .ok_or_else(|| { + NakamotoNodeError::SigningCoordinatorFailure( + "Invalid rejection timeout step function definition".into(), + ) + })?; + + // this is used to track the start of the waiting cycle + let rejections_timer = Instant::now(); loop { + // At every iteration wait for the block_status. + // Exit when the amount of confirmations/rejections reaches the threshold (or until timeout) + // Based on the amount of rejections, eventually modify the timeout. let block_status = match self.stackerdb_comms.wait_for_block_status( block_signer_sighash, EVENT_RECEIVER_POLL, |status| { - status.total_weight_signed < self.weight_threshold - && status - .total_reject_weight - .saturating_add(self.weight_threshold) - <= self.total_weight + // rejections-based timeout expired? + if rejections_timer.elapsed() > *rejections_timeout { + return false; + } + // number or rejections changed? + if status.total_reject_weight != rejections { + return false; + } + // enough signatures? + return status.total_weight_signed < self.weight_threshold; }, )? { Some(status) => status, @@ -336,10 +372,44 @@ impl SignerCoordinator { return Err(NakamotoNodeError::BurnchainTipChanged); } + if rejections_timer.elapsed() > *rejections_timeout { + warn!("Timed out while waiting for responses from signers"; + "elapsed" => rejections_timer.elapsed().as_secs(), + "rejections_timeout" => rejections_timeout.as_secs(), + "rejections" => rejections, + "rejections_threshold" => self.total_weight.saturating_sub(self.weight_threshold) + ); + return Err(NakamotoNodeError::SigningCoordinatorFailure( + "Timed out while waiting for signatures".into(), + )); + } + continue; } }; + if rejections != block_status.total_reject_weight { + rejections = block_status.total_reject_weight; + let (rejections_step, new_rejections_timeout) = self + .block_rejection_timeout_steps + .range((Included(0), Included(rejections))) + .last() + .ok_or_else(|| { + NakamotoNodeError::SigningCoordinatorFailure( + "Invalid rejection timeout step function definition".into(), + ) + })?; + rejections_timeout = new_rejections_timeout; + info!("Number of received rejections updated, resetting timeout"; + "rejections" => rejections, + "rejections_timeout" => rejections_timeout.as_secs(), + "rejections_step" => rejections_step, + "rejections_threshold" => self.total_weight.saturating_sub(self.weight_threshold)); + + counters.set_miner_current_rejections_timeout_secs(rejections_timeout.as_secs()); + counters.set_miner_current_rejections(rejections); + } + if block_status .total_reject_weight .saturating_add(self.weight_threshold) @@ -357,10 +427,18 @@ impl SignerCoordinator { "block_signer_sighash" => %block_signer_sighash, ); return Ok(block_status.gathered_signatures.values().cloned().collect()); - } else { + } else if rejections_timer.elapsed() > *rejections_timeout { + warn!("Timed out while waiting for responses from signers"; + "elapsed" => rejections_timer.elapsed().as_secs(), + "rejections_timeout" => rejections_timeout.as_secs(), + "rejections" => rejections, + "rejections_threshold" => self.total_weight.saturating_sub(self.weight_threshold) + ); return Err(NakamotoNodeError::SigningCoordinatorFailure( - "Unblocked without reaching the threshold".into(), + "Timed out while waiting for signatures".into(), )); + } else { + continue; } } } diff --git a/testnet/stacks-node/src/run_loop/neon.rs b/testnet/stacks-node/src/run_loop/neon.rs index 0c64444017..299335f35f 100644 --- a/testnet/stacks-node/src/run_loop/neon.rs +++ b/testnet/stacks-node/src/run_loop/neon.rs @@ -118,6 +118,9 @@ pub struct Counters { pub naka_miner_directives: RunLoopCounter, pub naka_submitted_commit_last_stacks_tip: RunLoopCounter, + pub naka_miner_current_rejections: RunLoopCounter, + pub naka_miner_current_rejections_timeout_secs: RunLoopCounter, + #[cfg(test)] pub naka_skip_commit_op: TestFlag, } @@ -214,6 +217,14 @@ impl Counters { pub fn set_microblocks_processed(&self, value: u64) { Counters::set(&self.microblocks_processed, value) } + + pub fn set_miner_current_rejections_timeout_secs(&self, value: u64) { + Counters::set(&self.naka_miner_current_rejections_timeout_secs, value) + } + + pub fn set_miner_current_rejections(&self, value: u32) { + Counters::set(&self.naka_miner_current_rejections, u64::from(value)) + } } /// Coordinating a node running in neon mode. diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index d384065cbd..dfe5c34443 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -7895,6 +7895,114 @@ fn block_validation_response_timeout() { ); } +// Verify that the miner timeout while waiting for signers will change accordingly +// to rejections. +#[test] +#[ignore] +fn block_validation_check_rejection_timeout_heuristic() { + if env::var("BITCOIND_TEST") != Ok("1".into()) { + return; + } + + info!("------------------------- Test Setup -------------------------"); + let num_signers = 20; + let timeout = Duration::from_secs(30); + let sender_sk = Secp256k1PrivateKey::random(); + let sender_addr = tests::to_addr(&sender_sk); + let send_amt = 100; + let send_fee = 180; + + let mut signer_test: SignerTest = SignerTest::new_with_config_modifications( + num_signers, + vec![(sender_addr, send_amt + send_fee)], + |config| { + config.block_proposal_validation_timeout = timeout; + }, + |config| { + config.miner.block_rejection_timeout_steps.clear(); + config + .miner + .block_rejection_timeout_steps + .insert(0, Duration::from_secs(123)); + config + .miner + .block_rejection_timeout_steps + .insert(10, Duration::from_secs(20)); + config + .miner + .block_rejection_timeout_steps + .insert(15, Duration::from_secs(10)); + config + .miner + .block_rejection_timeout_steps + .insert(20, Duration::from_secs(99)); + }, + None, + None, + ); + + let all_signers: Vec<_> = signer_test + .signer_stacks_private_keys + .iter() + .map(StacksPublicKey::from_private) + .collect(); + + signer_test.boot_to_epoch_3(); + + // note we just use mined nakamoto_blocks as the second block is not going to be confirmed + + let mut test_rejections = |signer_split_index: usize, expected_timeout: u64| { + let blocks_before = test_observer::get_mined_nakamoto_blocks().len(); + let (ignore_signers, reject_signers) = all_signers.split_at(signer_split_index); + + info!("------------------------- Check Rejections-based timeout with {} rejections -------------------------", reject_signers.len()); + + TEST_REJECT_ALL_BLOCK_PROPOSAL.set(reject_signers.to_vec()); + TEST_IGNORE_ALL_BLOCK_PROPOSALS.set(ignore_signers.to_vec()); + + next_block_and( + &mut signer_test.running_nodes.btc_regtest_controller, + 30, + || Ok(test_observer::get_mined_nakamoto_blocks().len() > blocks_before), + ) + .unwrap(); + + signer_test + .wait_for_block_rejections(timeout.as_secs(), &reject_signers) + .unwrap(); + + wait_for(60, || { + Ok(signer_test + .running_nodes + .counters + .naka_miner_current_rejections + .get() + >= reject_signers.len() as u64) + }) + .unwrap(); + assert_eq!( + signer_test + .running_nodes + .counters + .naka_miner_current_rejections_timeout_secs + .get(), + expected_timeout + ); + }; + + test_rejections(19, 123); + test_rejections(18, 20); + test_rejections(17, 10); + test_rejections(16, 99); + + // reset reject/ignore + TEST_REJECT_ALL_BLOCK_PROPOSAL.set(vec![]); + TEST_IGNORE_ALL_BLOCK_PROPOSALS.set(vec![]); + + info!("------------------------- Shutdown -------------------------"); + signer_test.shutdown(); +} + /// Test scenario: /// /// - when a signer submits a block validation request and