Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/5642 #5655

Open
wants to merge 34 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
cedbfd6
feat: add a way to query the highest available tenure
jcnelson Jan 3, 2025
8e283e6
chore: improve documentation on downloader state
jcnelson Jan 3, 2025
93fdd22
chore: report highest available tenure from downloader via NetworkResult
jcnelson Jan 3, 2025
b90d5d5
chore: pass through highest available tenure
jcnelson Jan 3, 2025
2429f50
chore: API sync
jcnelson Jan 3, 2025
2b45248
feat: add way to set IBD
jcnelson Jan 3, 2025
6979a64
feat: infer IBD from burnchain IBD and stacks IBD
jcnelson Jan 3, 2025
9331e25
fix: load IBD from globals
jcnelson Jan 3, 2025
6d39033
chore: document pox_sync_wait() better
jcnelson Jan 3, 2025
c5f7af5
Merge branch 'develop' into fix/5642
jcnelson Jan 3, 2025
b8011da
fix: immediately compute highest-available tenure since available_ten…
jcnelson Jan 4, 2025
e1feabe
chore: pass ongoing stacks tenure ID to NetworkResult
jcnelson Jan 4, 2025
fb61989
chore: pass highest available tenure from downloader to NetworkResult
jcnelson Jan 4, 2025
4102306
chore: API sync
jcnelson Jan 4, 2025
5049ee4
docs: get_headers_height() is 1-indexed
jcnelson Jan 4, 2025
4506b46
fix: the highest available tenure may be lower than the ongoing stack…
jcnelson Jan 4, 2025
6dabc04
chore: make method private again
jcnelson Jan 4, 2025
4474f2d
chore: expand follower_bootup_simple() to test is_fully_synced flag i…
jcnelson Jan 4, 2025
84658a4
Merge branch 'develop' into fix/5642
jcnelson Jan 4, 2025
bd254b7
chore: more structured logging on why a block proposal is rejected
jcnelson Jan 7, 2025
f632795
chore: log ibd in debug mode
jcnelson Jan 7, 2025
83aee92
feat: move test flags for stalling and skipping stacks block announce…
jcnelson Jan 7, 2025
ed37e67
chore: remove unused ibd variable and don't announce stacks blocks if…
jcnelson Jan 7, 2025
b53c4c6
chore: use relayer fault injection logic
jcnelson Jan 7, 2025
08d3028
chore: don't announce a stacks block if fault injection is active
jcnelson Jan 7, 2025
b5b6038
chore: log if we're still sync'ing
jcnelson Jan 7, 2025
0f42b62
chore: don't set ibd based on burnchain block download
jcnelson Jan 7, 2025
315c4d8
fix: don't allow tenure b to process until after we've mined the bloc…
jcnelson Jan 7, 2025
bdc6b82
Merge branch 'fix/forked-tenure-okay-ci-test' into fix/5642
jcnelson Jan 7, 2025
5ce7295
Merge branch 'fix/use_cargo_workspace' into fix/forked-tenure-okay-ci…
jcnelson Jan 7, 2025
77c2db1
fix: compile issue
jcnelson Jan 7, 2025
2e2e6ef
Merge branch 'fix/forked-tenure-okay-ci-test' into fix/5642
jcnelson Jan 7, 2025
321d890
chore: cargo fmt
jcnelson Jan 7, 2025
bad7699
Merge branch 'develop' into fix/5642
jcnelson Jan 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions stacks-signer/src/v0/signer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,11 @@ impl Signer {
"proposed_block_signer_sighash" => %signer_signature_hash,
"proposed_chain_length" => proposed_block.header.chain_length,
"expected_at_least" => last_block_info.block.header.chain_length + 1,
"last_block_info.block.header.consensus_hash" => %last_block_info.block.header.consensus_hash,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The best practice is not to use .s in these keys because systems like Grafana will replace the .s with _s.

"proposed_block_consensus_hash" => %proposed_block_consensus_hash,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a duplicate.

"last_block_info.state" => %last_block_info.state,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment about the ..

"non_reorgable_block" => non_reorgable_block,
"reorg_timeout_exceeded" => reorg_timeout_exceeded
);
return Some(self.create_block_rejection(
RejectCode::SortitionViewMismatch,
Expand Down
37 changes: 33 additions & 4 deletions stackslib/src/net/download/nakamoto/download_state_machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ pub struct NakamotoDownloadStateMachine {
tenure_block_ids: HashMap<NeighborAddress, AvailableTenures>,
/// Who can serve a given tenure
pub(crate) available_tenures: HashMap<ConsensusHash, Vec<NeighborAddress>>,
/// What is the highest available tenure, if known?
pub(crate) highest_available_tenure: Option<ConsensusHash>,
/// Confirmed tenure download schedule
pub(crate) tenure_download_schedule: VecDeque<ConsensusHash>,
/// Unconfirmed tenure download schedule
Expand Down Expand Up @@ -140,6 +142,7 @@ impl NakamotoDownloadStateMachine {
state: NakamotoDownloadState::Confirmed,
tenure_block_ids: HashMap::new(),
available_tenures: HashMap::new(),
highest_available_tenure: None,
tenure_download_schedule: VecDeque::new(),
unconfirmed_tenure_download_schedule: VecDeque::new(),
tenure_downloads: NakamotoTenureDownloaderSet::new(),
Expand Down Expand Up @@ -862,6 +865,14 @@ impl NakamotoDownloadStateMachine {
self.tenure_download_schedule = schedule;
self.tenure_block_ids = tenure_block_ids;
self.available_tenures = available;

let highest_available_tenure = self.find_highest_available_tenure();
self.highest_available_tenure = highest_available_tenure;
jcnelson marked this conversation as resolved.
Show resolved Hide resolved

test_debug!(
"new highest_available_tenure: {:?}",
&self.highest_available_tenure
);
}

/// Update our tenure download state machines, given our download schedule, our peers' tenure
Expand Down Expand Up @@ -958,14 +969,14 @@ impl NakamotoDownloadStateMachine {
return false;
}

let (unconfirmed_tenure_opt, confirmed_tenure_opt) = Self::find_unconfirmed_tenure_ids(
let (confirmed_tenure_opt, unconfirmed_tenure_opt) = Self::find_unconfirmed_tenure_ids(
wanted_tenures,
prev_wanted_tenures,
available_tenures,
);
debug!(
"Check unconfirmed tenures: highest two available tenures are {:?}, {:?}",
&unconfirmed_tenure_opt, &confirmed_tenure_opt
&confirmed_tenure_opt, &unconfirmed_tenure_opt
);

// see if we need any tenures still
Expand All @@ -980,11 +991,11 @@ impl NakamotoDownloadStateMachine {
});

if !is_available_and_processed {
let is_unconfirmed = unconfirmed_tenure_opt
let is_unconfirmed = confirmed_tenure_opt
.as_ref()
.map(|ch| *ch == wt.tenure_id_consensus_hash)
.unwrap_or(false)
|| confirmed_tenure_opt
|| unconfirmed_tenure_opt
.as_ref()
.map(|ch| *ch == wt.tenure_id_consensus_hash)
.unwrap_or(false);
Expand Down Expand Up @@ -1553,6 +1564,24 @@ impl NakamotoDownloadStateMachine {
}
}

/// Find the highest available tenure ID.
/// Returns Some(consensus_hash) for the highest tenure available from at least one node.
/// Returns None if no tenures are available from any peer.
fn find_highest_available_tenure(&self) -> Option<ConsensusHash> {
let (t1, t2) = Self::find_unconfirmed_tenure_ids(
&self.wanted_tenures,
self.prev_wanted_tenures.as_ref().unwrap_or(&vec![]),
&self.available_tenures,
);
if let Some(ch) = t2 {
return Some(ch);
} else if let Some(ch) = t1 {
return Some(ch);
} else {
return None;
}
jcnelson marked this conversation as resolved.
Show resolved Hide resolved
}

/// Go and get tenures. Returns list of blocks per tenure, identified by consensus hash.
/// The blocks will be sorted by height, but may not be contiguous.
pub fn run(
Expand Down
14 changes: 11 additions & 3 deletions stackslib/src/net/download/nakamoto/tenure_downloader_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,11 @@ pub struct NakamotoTenureDownloaderSet {
/// The set of tenures that have been successfully downloaded (but possibly not yet stored or
/// processed)
pub(crate) completed_tenures: HashSet<CompletedTenure>,
/// Number of times a tenure download was attempted
/// Number of times a tenure download was attempted. This counter is incremented before the
/// downloader starts
pub(crate) attempted_tenures: HashMap<ConsensusHash, u64>,
/// Number of times a tenure download failed
/// Number of times a tenure download failed. This counter is incremented after the downloader
/// finishes in an error state.
pub(crate) attempt_failed_tenures: HashMap<ConsensusHash, u64>,
/// Peers that should be deprioritized because they're dead (maps to when they can be used
/// again)
Expand Down Expand Up @@ -451,7 +453,13 @@ impl NakamotoTenureDownloaderSet {
continue;
};
if tenure_info.processed {
// we already have this tenure
// we already have tried to download this tenure,
// but do remove it from `self.completed_tenures` in order to (1) avoid a memory
// leak, and (2) account for the chance that the end-block has changed due to a
// Bitcoin reorg. This way, a subsequent call with the same tenure in `schedule`
// will succeed in starting a downloader. Since `schedule` is derived from on-disk
// state, the only way a "completed" tenure will show up in `schedule` again is if
// it is later determined that the tenure we stored is incomplete or not canonical.
debug!("Already have processed tenure {ch}");
self.completed_tenures
.remove(&CompletedTenure::from(tenure_info));
Expand Down
14 changes: 13 additions & 1 deletion stackslib/src/net/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1469,6 +1469,8 @@ pub const DENY_MIN_BAN_DURATION: u64 = 2;
pub struct NetworkResult {
/// Stacks chain tip when we began this pass
pub stacks_tip: StacksBlockId,
/// Stacks chain tip's tenure ID when we began this pass
pub stacks_tip_tenure_id: ConsensusHash,
/// PoX ID as it was when we begin downloading blocks (set if we have downloaded new blocks)
pub download_pox_id: Option<PoxId>,
/// Network messages we received but did not handle
Expand Down Expand Up @@ -1519,15 +1521,22 @@ pub struct NetworkResult {
pub coinbase_height: u64,
/// The observed stacks tip height (different in Nakamoto from coinbase height)
pub stacks_tip_height: u64,
/// The consensus hash of the stacks tip (prefixed `rc_` for historical reasons)
/// The consensus hash of the highest complete Stacks tenure at the time the canonical
/// sortition tip was processed. Not guaranteed to be the same across all nodes for the same
/// given sortition tip.
///
/// TODO: remove this and use canonical Stacks tenure ID instead.
pub rc_consensus_hash: ConsensusHash,
/// The current StackerDB configs
pub stacker_db_configs: HashMap<QualifiedContractIdentifier, StackerDBConfig>,
/// Highest available tenure, if known
pub highest_available_tenure: Option<ConsensusHash>,
}

impl NetworkResult {
pub fn new(
stacks_tip: StacksBlockId,
stacks_tip_tenure_id: ConsensusHash,
num_state_machine_passes: u64,
num_inv_sync_passes: u64,
num_download_passes: u64,
Expand All @@ -1537,9 +1546,11 @@ impl NetworkResult {
stacks_tip_height: u64,
rc_consensus_hash: ConsensusHash,
stacker_db_configs: HashMap<QualifiedContractIdentifier, StackerDBConfig>,
highest_available_tenure: Option<ConsensusHash>,
) -> NetworkResult {
NetworkResult {
stacks_tip,
stacks_tip_tenure_id,
unhandled_messages: HashMap::new(),
download_pox_id: None,
blocks: vec![],
Expand Down Expand Up @@ -1567,6 +1578,7 @@ impl NetworkResult {
stacks_tip_height,
rc_consensus_hash,
stacker_db_configs,
highest_available_tenure,
}
}

Expand Down
7 changes: 6 additions & 1 deletion stackslib/src/net/p2p.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5219,7 +5219,7 @@ impl PeerNetwork {
poll_timeout: u64,
handler_args: &RPCHandlerArgs,
) -> Result<NetworkResult, net_error> {
debug!(">>>>>>>>>>>>>>>>>>>>>>> Begin Network Dispatch (poll for {}) >>>>>>>>>>>>>>>>>>>>>>>>>>>>", poll_timeout);
debug!(">>>>>>>>>>>>>>>>>>>>>>> Begin Network Dispatch (poll for {}, ibd={}) >>>>>>>>>>>>>>>>>>>>>>>>>>>>", poll_timeout, ibd);
let mut poll_states = match self.network {
None => {
debug!("{:?}: network not connected", &self.local_peer);
Expand Down Expand Up @@ -5258,6 +5258,7 @@ impl PeerNetwork {
);
let mut network_result = NetworkResult::new(
self.stacks_tip.block_id(),
self.stacks_tip.consensus_hash.clone(),
jcnelson marked this conversation as resolved.
Show resolved Hide resolved
self.num_state_machine_passes,
self.num_inv_sync_passes,
self.num_downloader_passes,
Expand All @@ -5267,6 +5268,10 @@ impl PeerNetwork {
self.stacks_tip.height,
self.chain_view.rc_consensus_hash.clone(),
self.get_stacker_db_configs_owned(),
self.block_downloader_nakamoto
.as_ref()
.map(|dler| dler.highest_available_tenure.clone())
jcnelson marked this conversation as resolved.
Show resolved Hide resolved
.flatten(),
);

network_result.consume_unsolicited(unsolicited_buffered_messages);
Expand Down
52 changes: 49 additions & 3 deletions stackslib/src/net/relay.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ pub mod fault_injection {
use std::path::Path;

static IGNORE_BLOCK: std::sync::Mutex<Option<(u64, String)>> = std::sync::Mutex::new(None);
pub static TEST_BLOCK_ANNOUNCE_STALL: std::sync::Mutex<Option<bool>> =
std::sync::Mutex::new(None);
pub static TEST_BLOCK_ANNOUNCE_SKIP: std::sync::Mutex<Option<bool>> =
std::sync::Mutex::new(None);

pub fn ignore_block(height: u64, working_dir: &str) -> bool {
if let Some((ignore_height, ignore_dir)) = &*IGNORE_BLOCK.lock().unwrap() {
Expand Down Expand Up @@ -102,6 +106,34 @@ pub mod fault_injection {
warn!("Fault injection: clear ignore block");
*IGNORE_BLOCK.lock().unwrap() = None;
}

pub fn stacks_announce_is_blocked() -> bool {
*TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap() == Some(true)
}

pub fn stacks_announce_is_skipped() -> bool {
*TEST_BLOCK_ANNOUNCE_SKIP.lock().unwrap() == Some(true)
}

pub fn no_stacks_announce() -> bool {
stacks_announce_is_blocked() || stacks_announce_is_skipped()
}

pub fn block_stacks_announce() {
*TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap() = Some(true);
}

pub fn unblock_stacks_announce() {
*TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap() = None;
}

pub fn skip_stacks_announce() {
*TEST_BLOCK_ANNOUNCE_SKIP.lock().unwrap() = Some(true);
}

pub fn unskip_stacks_announce() {
*TEST_BLOCK_ANNOUNCE_SKIP.lock().unwrap() = None;
}
}

#[cfg(not(any(test, feature = "testing")))]
Expand All @@ -113,6 +145,14 @@ pub mod fault_injection {
pub fn set_ignore_block(_height: u64, _working_dir: &str) {}

pub fn clear_ignore_block() {}

pub fn no_stacks_announce() -> bool {
false
}

pub fn stacks_announce_is_skipped() -> bool {
false
}
}

pub struct Relayer {
Expand Down Expand Up @@ -1085,7 +1125,9 @@ impl Relayer {
if accepted {
info!("{}", &accept_msg);
if let Some(coord_comms) = coord_comms {
if !coord_comms.announce_new_stacks_block() {
if !fault_injection::no_stacks_announce()
&& !coord_comms.announce_new_stacks_block()
{
return Err(chainstate_error::NetError(net_error::CoordinatorClosed));
}
}
Expand Down Expand Up @@ -2089,7 +2131,9 @@ impl Relayer {
new_confirmed_microblocks.len()
);
if let Some(coord_comms) = coord_comms {
if !coord_comms.announce_new_stacks_block() {
if !fault_injection::no_stacks_announce()
&& !coord_comms.announce_new_stacks_block()
{
return Err(net_error::CoordinatorClosed);
}
}
Expand Down Expand Up @@ -2178,7 +2222,9 @@ impl Relayer {
}
if !http_uploaded_blocks.is_empty() {
coord_comms.inspect(|comm| {
comm.announce_new_stacks_block();
if !fault_injection::no_stacks_announce() {
comm.announce_new_stacks_block();
}
});
}

Expand Down
12 changes: 12 additions & 0 deletions stackslib/src/net/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1163,6 +1163,7 @@ fn test_boot_nakamoto_peer() {
fn test_network_result_update() {
let mut network_result_1 = NetworkResult::new(
StacksBlockId([0x11; 32]),
ConsensusHash([0x01; 20]),
1,
1,
1,
Expand All @@ -1172,10 +1173,12 @@ fn test_network_result_update() {
1,
ConsensusHash([0x11; 20]),
HashMap::new(),
None,
);

let mut network_result_2 = NetworkResult::new(
StacksBlockId([0x22; 32]),
ConsensusHash([0x01; 20]),
2,
2,
2,
Expand All @@ -1185,6 +1188,7 @@ fn test_network_result_update() {
2,
ConsensusHash([0x22; 20]),
HashMap::new(),
None,
);

let nk1 = NeighborKey {
Expand Down Expand Up @@ -1613,6 +1617,7 @@ fn test_network_result_update() {
// stackerdb uploaded chunks get consolidated correctly
let mut old = NetworkResult::new(
StacksBlockId([0xaa; 32]),
ConsensusHash([0x01; 20]),
10,
10,
10,
Expand All @@ -1622,6 +1627,7 @@ fn test_network_result_update() {
10,
ConsensusHash([0xaa; 20]),
HashMap::new(),
None,
);
let mut new = old.clone();

Expand Down Expand Up @@ -1672,6 +1678,7 @@ fn test_network_result_update() {
// stackerdb pushed chunks get consolidated correctly
let mut old = NetworkResult::new(
StacksBlockId([0xaa; 32]),
ConsensusHash([0x01; 20]),
10,
10,
10,
Expand All @@ -1681,6 +1688,7 @@ fn test_network_result_update() {
10,
ConsensusHash([0xaa; 20]),
HashMap::new(),
None,
);
let mut new = old.clone();

Expand Down Expand Up @@ -1731,6 +1739,7 @@ fn test_network_result_update() {
// nakamoto blocks obtained via download, upload, or pushed get consoldated
let mut old = NetworkResult::new(
StacksBlockId([0xbb; 32]),
ConsensusHash([0x01; 20]),
11,
11,
11,
Expand All @@ -1740,6 +1749,7 @@ fn test_network_result_update() {
11,
ConsensusHash([0xbb; 20]),
HashMap::new(),
None,
);
old.nakamoto_blocks.insert(nblk1.block_id(), nblk1.clone());
old.pushed_nakamoto_blocks.insert(
Expand All @@ -1755,6 +1765,7 @@ fn test_network_result_update() {

let new = NetworkResult::new(
StacksBlockId([0xbb; 32]),
ConsensusHash([0x01; 20]),
11,
11,
11,
Expand All @@ -1764,6 +1775,7 @@ fn test_network_result_update() {
11,
ConsensusHash([0xbb; 20]),
HashMap::new(),
None,
);

let mut new_pushed = new.clone();
Expand Down
Loading
Loading