Skip to content

Commit

Permalink
Changing error messages in case of node disconnection; cleaning up un…
Browse files Browse the repository at this point in the history
…used properties; removing `request_limit` and the logic attached to that since we don't actually handle multiple in-flight requests; Removing the possibility to define "infinite" as a valid retry amount in node client connector since it can lead to deadlocks
  • Loading branch information
zajko committed Jan 28, 2025
1 parent 7a22f99 commit c89da6a
Show file tree
Hide file tree
Showing 14 changed files with 169 additions and 321 deletions.
24 changes: 12 additions & 12 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,8 @@ address = '0.0.0.0:28101'
max_message_size_bytes = 4_194_304
request_limit = 3
request_buffer_size = 16
message_timeout_secs = 30
client_access_timeout_secs = 2
message_timeout_secs = 10
client_access_timeout_secs = 10

[rpc_server.speculative_exec_server]
enable_server = true
Expand Down
39 changes: 37 additions & 2 deletions metrics/src/rpc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ const RESPONSE_SIZE_BUCKETS: &[f64; 8] = &[
5e+2_f64, 1e+3_f64, 2e+3_f64, 5e+3_f64, 5e+4_f64, 5e+5_f64, 5e+6_f64, 5e+7_f64,
];

const RESPONSE_TIME_MS_BUCKETS: &[f64; 8] = &[
1_f64, 5_f64, 10_f64, 30_f64, 50_f64, 100_f64, 200_f64, 300_f64,
const RESPONSE_TIME_MS_BUCKETS: &[f64; 9] = &[
1_f64, 5_f64, 10_f64, 30_f64, 50_f64, 100_f64, 300_f64, 1000_f64, 3000_f64,
];

static ENDPOINT_CALLS: Lazy<IntCounterVec> = Lazy::new(|| {
Expand All @@ -24,6 +24,21 @@ static ENDPOINT_CALLS: Lazy<IntCounterVec> = Lazy::new(|| {
counter
});

static TIMEOUT_COUNTERS: Lazy<IntCounterVec> = Lazy::new(|| {
let counter = IntCounterVec::new(
Opts::new(
"rpc_server_timeout_counts",
"Counters for how many of the requests failed due to internal timeout",
),
&["timer"],
)
.unwrap();
REGISTRY
.register(Box::new(counter.clone()))
.expect("cannot register metric");
counter
});

static RESPONSE_TIMES_MS: Lazy<HistogramVec> = Lazy::new(|| {
let histogram = HistogramVec::new(
HistogramOpts {
Expand Down Expand Up @@ -56,6 +71,18 @@ static RECONNECT_TIMES_MS: Lazy<Histogram> = Lazy::new(|| {
histogram
});

static MISMATCHED_IDS: Lazy<IntGauge> = Lazy::new(|| {
let counter = IntGauge::new(
"rpc_server_mismatched_ids",
"Number of mismathced id events observed in responses from binary port",
)
.expect("rpc_server_mismatched_ids metric can't be created");
REGISTRY
.register(Box::new(counter.clone()))
.expect("cannot register metric");
counter
});

static DISCONNECT_EVENTS: Lazy<IntGauge> = Lazy::new(|| {
let counter = IntGauge::new(
"rpc_server_disconnects",
Expand Down Expand Up @@ -108,3 +135,11 @@ pub fn register_request_size(method: &str, payload_size: f64) {
.with_label_values(&[method])
.observe(payload_size);
}

pub fn register_timeout(timer_name: &str) {
TIMEOUT_COUNTERS.with_label_values(&[timer_name]).inc();
}

pub fn register_mismatched_id() {
MISMATCHED_IDS.inc();
}
4 changes: 1 addition & 3 deletions resources/example_configs/EXAMPLE_NCTL_CONFIG.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,8 @@ cors_origin = ""
ip_address = "0.0.0.0"
port = 28102
max_message_size_bytes = 4194304
request_limit = 3
request_buffer_size = 16
message_timeout_secs = 30
client_access_timeout_secs = 2
client_access_timeout_secs = 10
keepalive_timeout_ms = 10_000

[rpc_server.node_client.exponential_backoff]
Expand Down
4 changes: 1 addition & 3 deletions resources/example_configs/EXAMPLE_NCTL_POSTGRES_CONFIG.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,8 @@ cors_origin = ""
ip_address = "0.0.0.0"
port = 28102
max_message_size_bytes = 4194304
request_limit = 3
request_buffer_size = 16
message_timeout_secs = 30
client_access_timeout_secs = 2
client_access_timeout_secs = 10
keepalive_timeout_ms = 10_000

[rpc_server.node_client.exponential_backoff]
Expand Down
4 changes: 1 addition & 3 deletions resources/example_configs/EXAMPLE_NODE_CONFIG.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,8 @@ cors_origin = ""
ip_address = "3.20.57.210"
port = 7777
max_message_size_bytes = 4194304
request_limit = 10
request_buffer_size = 50
message_timeout_secs = 60
client_access_timeout_secs = 60
client_access_timeout_secs = 10
keepalive_timeout_ms = 10_000

[rpc_server.node_client.exponential_backoff]
Expand Down
6 changes: 1 addition & 5 deletions resources/example_configs/default_debian_config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,10 @@ ip_address = '127.0.0.1'
port = 7779
# Maximum size of a message in bytes.
max_message_size_bytes = 4_194_304
# Maximum number of in-flight node requests.
request_limit = 3
# Number of node requests that can be buffered.
request_buffer_size = 16
# Timeout for a node request in seconds.
message_timeout_secs = 30
# Timeout specifying how long to wait for binary port client to be available.
client_access_timeout_secs = 2
client_access_timeout_secs = 10
# The amount of time in milliseconds to wait between sending keepalive requests.
keepalive_timeout_ms = 10_000

Expand Down
2 changes: 1 addition & 1 deletion resources/example_configs/default_rpc_only_config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ request_buffer_size = 16
# Timeout for a node request in seconds.
message_timeout_secs = 30
# Timeout specifying how long to wait for binary port client to be available.
client_access_timeout_secs = 2
client_access_timeout_secs = 10
# The amount of time in milliseconds to wait between sending keepalive requests.
keepalive_timeout_ms = 10_000

Expand Down
6 changes: 3 additions & 3 deletions resources/example_configs/default_sse_only_config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ disable_event_persistence = false

[[sse_server.connections]]
ip_address = "127.0.0.1"
sse_port = 9999
rest_port = 8888
sse_port = 18101
rest_port = 14101
max_attempts = 10000
delay_between_retries_in_seconds = 5
allow_partial_connection = false
Expand All @@ -32,7 +32,7 @@ port = 18888
max_concurrent_requests = 50
max_requests_per_second = 50

[admin_server]
[admin_api_server]
enable_server = true
port = 18887
max_concurrent_requests = 1
Expand Down
Loading

0 comments on commit c89da6a

Please sign in to comment.