-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
200 bug cluster node initializes as healthy #252
base: main
Are you sure you want to change the base?
Changes from 15 commits
824a221
4298c01
ee6cf93
c0b14a0
a0e4141
705348d
7e8128a
e0946e0
15187ff
55aab7c
c2cf7f5
5e3329d
5c502e7
fc4d67e
5e375fa
3b3d399
0d410d4
131d13c
e470e1e
7e3b153
5c2151a
6ab1f12
a15c0c1
af1e9ed
a143f2d
7cc0a63
12ced0b
249c4bd
f4c5d6b
c94031c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -73,6 +73,9 @@ DATE_NOW=$(date +"%Y%m%d%H%M%S") | |
FALKORDB_LOG_FILE_PATH=$(if [[ $SAVE_LOGS_TO_FILE -eq 1 ]]; then echo $DATA_DIR/falkordb_$DATE_NOW.log; else echo ""; fi) | ||
NODE_CONF_FILE=$DATA_DIR/node.conf | ||
|
||
|
||
sleep 10 | ||
|
||
if [[ $OMNISTRATE_ENVIRONMENT_TYPE != "PROD" ]];then | ||
DEBUG=1 | ||
fi | ||
|
@@ -458,10 +461,6 @@ else | |
echo "Cluster does not exist. Waiting for it to be created" | ||
fi | ||
|
||
# Run this before health check to prevent client connections until discrepancies are resolved. | ||
meet_unknown_nodes | ||
ensure_replica_connects_to_the_right_master_ip | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are those removed? |
||
|
||
if [[ $RUN_HEALTH_CHECK -eq 1 ]]; then | ||
# Check if healthcheck binary exists | ||
|
@@ -473,6 +472,7 @@ if [[ $RUN_HEALTH_CHECK -eq 1 ]]; then | |
fi | ||
fi | ||
|
||
|
||
if [[ $RUN_METRICS -eq 1 ]]; then | ||
echo "Starting Metrics" | ||
exporter_url=$(if [[ $TLS == "true" ]]; then echo "rediss://$NODE_HOST:$NODE_PORT"; else echo "redis://localhost:$NODE_PORT"; fi) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,9 +9,9 @@ fn main() { | |
let args: Vec<String> = args().collect(); | ||
|
||
if args.len() > 1 && args[1] == "sentinel" { | ||
start_health_check_server(true); | ||
start_probes_check_server(true); | ||
} else { | ||
start_health_check_server(false); | ||
start_probes_check_server(false); | ||
} | ||
} | ||
|
||
|
@@ -26,7 +26,7 @@ fn main() { | |
/// # Arguments | ||
/// | ||
/// * `is_sentinel` - A boolean that indicates whether the health check server is for a Redis Sentinel instance. | ||
fn start_health_check_server(is_sentinel: bool) { | ||
fn start_probes_check_server(is_sentinel: bool) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. any reason for the name change? |
||
let port = if is_sentinel { | ||
env::var("HEALTH_CHECK_PORT_SENTINEL").unwrap_or_else(|_| "8082".to_string()) | ||
} else { | ||
|
@@ -38,14 +38,26 @@ fn start_health_check_server(is_sentinel: bool) { | |
let server = Server::new(addr, move |request| { | ||
router!(request, | ||
(GET) (/healthcheck) => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe change the route to /liveness? |
||
let health = health_check_handler(is_sentinel).unwrap_or_else(|_| false); | ||
let health = probes_check_handler(is_sentinel,false,true).unwrap_or_else(|_| false); | ||
|
||
if health { | ||
Response::text("OK") | ||
} else { | ||
Response::text("Not ready").with_status_code(500) | ||
} | ||
}, | ||
(GET) (/readiness) => { | ||
let health = probes_check_handler(is_sentinel,true,false).unwrap_or_else(|_| false); | ||
|
||
if health { | ||
Response::text("OK") | ||
} else { | ||
Response::text("Not ready").with_status_code(500) | ||
} | ||
}, | ||
(GET) (/startup) => { | ||
Response::text("OK") | ||
}, | ||
_ => Response::empty_404() | ||
) | ||
}) | ||
|
@@ -74,7 +86,7 @@ fn start_health_check_server(is_sentinel: bool) { | |
/// # Errors | ||
/// | ||
/// The function returns a RedisError if there is an error connecting to the Redis server. | ||
fn health_check_handler(is_sentinel: bool) -> Result<bool, redis::RedisError> { | ||
fn probes_check_handler(is_sentinel: bool,readiness: bool, healthcheck: bool) -> Result<bool, redis::RedisError> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i don't think aadding bool args are a clean solution. Maybe split into 3 functions and separate common components in util functions There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that goes for all functions you added bools as params. |
||
let password = match env::var("ADMIN_PASSWORD") { | ||
Ok(password) => password, | ||
Err(_) => { | ||
|
@@ -84,7 +96,7 @@ fn health_check_handler(is_sentinel: bool) -> Result<bool, redis::RedisError> { | |
.unwrap_or_else(|_| String::new()) | ||
} | ||
}; | ||
|
||
let node_port = if is_sentinel { | ||
env::var("SENTINEL_PORT").unwrap_or_else(|_| "26379".to_string()) | ||
} else { | ||
|
@@ -117,7 +129,7 @@ fn health_check_handler(is_sentinel: bool) -> Result<bool, redis::RedisError> { | |
let is_cluster = db_info.contains("cluster_enabled:1"); | ||
|
||
if is_cluster { | ||
return get_status_from_cluster_node(db_info, &mut con); | ||
return get_status_from_cluster_node(db_info, &mut con,readiness , healthcheck); | ||
} | ||
|
||
let role_regex = regex::Regex::new(r"role:(\w+)").unwrap(); | ||
|
@@ -130,9 +142,9 @@ fn health_check_handler(is_sentinel: bool) -> Result<bool, redis::RedisError> { | |
let role = role_matches.unwrap().get(1).unwrap().as_str(); | ||
|
||
if role == "master" { | ||
get_status_from_master(&db_info) | ||
get_status_from_master(&db_info,&mut con,readiness,healthcheck) | ||
} else { | ||
get_status_from_slave(&db_info) | ||
get_status_from_slave(&db_info,&mut con,readiness,healthcheck) | ||
} | ||
} | ||
|
||
|
@@ -153,13 +165,29 @@ fn health_check_handler(is_sentinel: bool) -> Result<bool, redis::RedisError> { | |
/// # Errors | ||
/// | ||
/// The function returns a RedisError if there is an error querying the Redis server. | ||
|
||
//IAM NOT SURE ABOUT CHANGING THIS FUNCTION | ||
fn get_status_from_cluster_node( | ||
_db_info: String, | ||
con: &mut redis::Connection, | ||
readiness: bool, | ||
healthcheck: bool, | ||
) -> Result<bool, redis::RedisError> { | ||
let cluster_info: String = redis::cmd("CLUSTER").arg("INFO").query(con)?; | ||
|
||
Ok(cluster_info.contains("cluster_state:ok")) | ||
if healthcheck { | ||
let cluster_state: bool = cluster_info.contains("cluster_state:ok"); | ||
let loading: bool = cluster_info.contains("LOADING"); | ||
let busy: bool = cluster_info.contains("BUSY"); // This might not exist in Redis. | ||
let master_down: bool = cluster_info.contains("MASTERDOWN"); | ||
if cluster_state || loading || busy || master_down { | ||
return Ok(true); | ||
} | ||
} else if readiness { | ||
return Ok(cluster_info.contains("cluster_state:ok")); | ||
} | ||
|
||
Ok(false) // Default return to avoid missing a return value | ||
} | ||
|
||
/// Checks the status of the Redis master. | ||
|
@@ -174,11 +202,20 @@ fn get_status_from_cluster_node( | |
/// # Returns | ||
/// | ||
/// A boolean value that indicates whether the Redis master is ready | ||
fn get_status_from_master(db_info: &str) -> Result<bool, redis::RedisError> { | ||
if db_info.contains("loading:1") { | ||
return Ok(false); | ||
fn get_status_from_master(db_info: &str,con: &mut redis::Connection,readiness: bool, healthcheck: bool) -> Result<bool, redis::RedisError> { | ||
let result : String = redis::cmd("PING").query(con)?; | ||
if healthcheck { | ||
if result.contains("PONG") || result.contains("LOADING") || result.contains("BUSY") || result.contains("MASTERDOWN"){ | ||
return Ok(true); | ||
} | ||
|
||
} else if readiness { | ||
if result.contains("PONG") && db_info.contains("loading:0") { | ||
return Ok(true); | ||
} | ||
} | ||
Ok(true) | ||
|
||
Ok(false) | ||
} | ||
|
||
/// Checks the status of the Redis slave. | ||
|
@@ -193,16 +230,21 @@ fn get_status_from_master(db_info: &str) -> Result<bool, redis::RedisError> { | |
/// # Returns | ||
/// | ||
/// A boolean value that indicates whether the Redis slave is ready | ||
fn get_status_from_slave(db_info: &str) -> Result<bool, redis::RedisError> { | ||
if db_info.contains("loading:1") { | ||
return Ok(false); | ||
} | ||
|
||
if !db_info.contains("master_link_status:up") || db_info.contains("master_sync_in_progress:1") { | ||
return Ok(false); | ||
fn get_status_from_slave(db_info: &str, con: &mut redis::Connection, readiness: bool,healthcheck: bool) -> Result<bool, redis::RedisError> { | ||
|
||
let result : String = redis::cmd("PING").query(con)?; | ||
if healthcheck { | ||
if result.contains("PONG") || result.contains("LOADING") || result.contains("BUSY") || result.contains("MASTERDOWN") { | ||
return Ok(true); | ||
} | ||
|
||
} else if readiness { | ||
if result.contains("PONG") && db_info.contains("loading:0") && db_info.contains("master_link_status:up") && db_info.contains("master_sync_in_progress:0") { | ||
return Ok(true); | ||
} | ||
} | ||
|
||
Ok(true) | ||
Ok(false) | ||
} | ||
|
||
/// Resolves the host using the dns_lookup crate. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It'd be nice to get an explanation for that