Skip to content

Commit

Permalink
version bump QOL and fixed connections
Browse files Browse the repository at this point in the history
  • Loading branch information
louisponet committed May 25, 2023
1 parent 07bf9ab commit d122f07
Show file tree
Hide file tree
Showing 5 changed files with 341 additions and 175 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "RemoteHPC"
uuid = "c4f2a1c4-7655-40d8-82ee-c6ae0a8b7409"
authors = ["louisponet <[email protected]>"]
version = "0.3.27"
version = "0.3.28"

[deps]
BinaryTraits = "190e46ec-f771-4705-b939-984896f7be0e"
Expand Down
208 changes: 125 additions & 83 deletions src/client.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,108 +4,150 @@
Launches the daemon process on the host [`Server`](@ref) `s`.
"""
function start(s::Server; verbosity=0)
if !islocal(s) && !isalive(LOCAL_SERVER[])
@warn "Local server not running. Starting that first."
start(LOCAL_SERVER[])
if !isalive(LOCAL_SERVER[])
error("Couldn't start local server.")
end
end
alive = isalive(s)
@assert !alive "Server is already up and running."
@debug "Starting:\n$s"
hostname = gethostname(s)
conf_path = config_path(s)
t = server_command(s, "ls $(conf_path)")
if t.exitcode != 0
error("RemoteHPC not installed on server. Install it using `RemoteHPC.install_RemoteHPC(Server(\"$(s.name)\"))`")
end

if islocal(s)
t = ispath(config_path("self_destruct"))
else
cmd = "cat $(conf_path)/$hostname/self_destruct"
t = server_command(s, cmd).exitcode == 0
end

@assert !t "Self destruction was previously triggered, signalling issues on the Server.\nPlease investigate and if safe, remove $(conf_path)/self_destruct"

title = "Starting Server($(s.name))"
steps = ["Verifying that local server is running",
"Verifying that the server isn't already alive",
"Starting server",
"Waiting for server connection"]

StepSpinner(title, steps) do spinner


if !islocal(s) && !isalive(LOCAL_SERVER[])
push!(spinner, "Starting local server.")
start(LOCAL_SERVER[])
if !isalive(LOCAL_SERVER[])
finish!(spinner, ErrorException("Couldn't start local server."))
end
end
push!(spinner, "local server running")

if !islocal(s)
t = deepcopy(s)
t.domain = "localhost"
t.name = hostname
tf = tempname()
JSON3.write(tf, t)
push(tf, s, "$(conf_path)/$hostname/storage/servers/$hostname.json")
end
next!(spinner)

alive = isalive(s) || get(JSON3.read(check_connections(names=[s.name]).body), Symbol(s.name), false)
if alive
push!(spinner, "Server is already up and running.")
finish!(spinner)
return
end

# Here we check what the modify time of the server-side localhost file is.
# The server will rewrite the file with the correct port, which we use to see
# whether the server started succesfully.
function checktime()
curtime = 0
next!(spinner)

hostname = gethostname(s)
conf_path = config_path(s)
t = server_command(s, "ls $(conf_path)")

if t.exitcode != 0

finish!(spinner, ErrorException("RemoteHPC not installed on server. Install it using `RemoteHPC.install(Server(\"$(s.name)\"))`"))

end

if islocal(s)
return mtime(config_path("storage", "servers", "$(hostname).json"))
self_destructed = ispath(config_path("self_destruct"))

else
cmd = "stat -c %Z $(conf_path)/$hostname/storage/servers/$(hostname).json"
return parse(Int, server_command(s.username, s.domain, cmd)[1])
cmd = "cat $(conf_path)/$hostname/self_destruct"
self_destructed = server_command(s, cmd).exitcode == 0

end
return curtime
end
firstime = checktime()

if self_destructed
finish!(spinner,
ErrorException("""Self destruction was previously triggered, signalling issues on the Server.
Please investigate and if safe, remove $(conf_path)/self_destruct"""))
end

if !islocal(s)
t = deepcopy(s)
t.domain = "localhost"
t.name = hostname
tf = tempname()
JSON3.write(tf, t)
push(tf, s, "$(conf_path)/$hostname/storage/servers/$hostname.json")
end

# Here we check what the modify time of the server-side localhost file is.
# The server will rewrite the file with the correct port, which we use to see
# whether the server started succesfully.
function checktime()
curtime = 0
if islocal(s)
return mtime(config_path("storage", "servers", "$(hostname).json"))
else
cmd = "stat -c %Z $(conf_path)/$hostname/storage/servers/$(hostname).json"
return parse(Int, server_command(s.username, s.domain, cmd)[1])
end
return curtime
end
firstime = checktime()

p = "$(conf_path)/$hostname/logs/errors.log"
scrpt = "using RemoteHPC; RemoteHPC.julia_main(verbose=$(verbosity))"
if s.domain != "localhost"
julia_cmd = replace("""$(s.julia_exec) --project=$(conf_path) --startup-file=no -t 10 -e "using RemoteHPC; RemoteHPC.julia_main(verbose=$(verbosity))" &> $p""",
"'" => "")
if Sys.which("ssh") === nothing
OpenSSH_jll.ssh() do ssh_exec
run(Cmd(`$ssh_exec -f $(ssh_string(s)) $julia_cmd`; detach = true))
p = "$(conf_path)/$hostname/logs/errors.log"
scrpt = "using RemoteHPC; RemoteHPC.julia_main(verbose=$(verbosity))"

if s.domain != "localhost"
julia_cmd = replace("""$(s.julia_exec) --project=$(conf_path) --startup-file=no -t 10 -e "using RemoteHPC; RemoteHPC.julia_main(verbose=$(verbosity))" &> $p""",
"'" => "")
if Sys.which("ssh") === nothing
OpenSSH_jll.ssh() do ssh_exec
run(Cmd(`$ssh_exec -f $(ssh_string(s)) $julia_cmd`; detach = true))
end
else
run(Cmd(`ssh -f $(ssh_string(s)) $julia_cmd`; detach = true))
end
else
run(Cmd(`ssh -f $(ssh_string(s)) $julia_cmd`; detach = true))
e = s.julia_exec * " --project=$(conf_path)"
julia_cmd = Cmd([string.(split(e))..., "--startup-file=no", "-t", "auto", "-e",
scrpt, "&>", p, "&"])
run(Cmd(julia_cmd; detach = true); wait = false)
end
else
e = s.julia_exec * " --project=$(conf_path)"
julia_cmd = Cmd([string.(split(e))..., "--startup-file=no", "-t", "auto", "-e",
scrpt, "&>", p, "&"])
run(Cmd(julia_cmd; detach = true); wait = false)
end

#TODO: little hack here
retries = 0
prog = ProgressUnknown("Waiting for server bootup:"; spinner = true)
while checktime() <= firstime && retries < 60
ProgressMeter.next!(prog; spinner = "⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏", showvalues = [(:try, retries)])
retries += 1
sleep(1)
end
finish!(prog)

if retries == 60
error("Something went wrong starting the server.")
else
retries = 0
push!(spinner, "Waiting for server bootup")

while checktime() <= firstime && retries < 60
retries += 1
sleep(1)
end

if retries == 60
finish!(spinner, ErrorException("Something went wrong starting the server."))
end

next!(spinner)

cfg = load_config(s)
s.port = cfg.port
s.uuid = cfg.uuid

@debug "Daemon on Server $(s.name) started, listening on local port $(s.port)."
@debug "Saving updated server info..."
save(s)
end
if islocal(s)
while !isalive(s)
sleep(0.1)

retries = 0

if islocal(s)
while !isalive(s) && retries < 60
sleep(0.1)
retries += 1
end
LOCAL_SERVER[] = local_server()

else
check_connections(; names=[s.name])
while !isalive(s) && retries < 60
sleep(0.1)
end
end
LOCAL_SERVER[] = local_server()
else
check_connections(; names=[s.name])
while !isalive(s)
sleep(0.1)

if retries == 60
finish!(spinner, ErrorException("""Couldn't set up server connection.
This can be because the daemon crashed
or because the local server can't setup a ssh tunnel to it"""))
end

return s
end
return s
end

"""
Expand Down
29 changes: 17 additions & 12 deletions src/runtime.jl
Original file line number Diff line number Diff line change
Expand Up @@ -279,12 +279,13 @@ end

function check_connections!(connections, verify_tunnels; names=keys(connections))
@debug "Checking connections..." logtype=RuntimeLog
for (n, connected) in connections

for n in names
if !exists(Server(name=n))
pop!(connections, n)
continue
end
!(n in names) && continue

s = load(Server(n))
s.domain == "localhost" && continue

Expand All @@ -296,22 +297,20 @@ function check_connections!(connections, verify_tunnels; names=keys(connections)
connections[n] = false
end
end

if verify_tunnels
@debugv 1 "Verifying tunnels" logtype=RuntimeLog
for (n, connected) in connections
connected && continue
!(n in names) && continue
for n in names

connections[n] && continue

s = load(Server(n))
s.domain == "localhost" && continue

if find_tunnel(s) !== nothing
@debugv 0 "Couldn't connect to Server $n but tunnel exists so ignoring. Destroy it manually to try creating a new one and reconnect."
continue
end

@debugv 0 "Connection to $n: $(connections[n])" logtype=RuntimeLog

connections[n] = @timeout 30 begin
destroy_tunnel(s)
try
remote_server = load_config(s.username, s.domain, config_path(s))
remote_server === nothing && return false
Expand All @@ -336,20 +335,26 @@ function check_connections!(connections, verify_tunnels; names=keys(connections)
end false
end
end

return connections
end

function check_connections!(server_data::ServerData, args...; kwargs...)
all_servers = load(Server(""))

for k in filter(x-> !(x in all_servers), keys(server_data.connections))
delete!(server_data.connections, k)
end

for n in all_servers
n == server_data.server.name && continue
server_data.connections[n] = get(server_data.connections, n, false)
end

conn = check_connections!(server_data.connections, args...; kwargs...)

@debugv 1 "Connections: $(server_data.connections)" logtype=RuntimeLog

return conn
end

Expand All @@ -376,10 +381,10 @@ function julia_main(;verbose=0, kwargs...)::Cint
connection_task = Threads.@spawn @stoppable server_data.stop begin
@debug "Checking Connections" logtype=RuntimeLog
try
check_connections!(server_data, true)
check_connections!(server_data, false)
while true
t = time()
check_connections!(server_data, true)
check_connections!(server_data, false)
sleep_t = 5 - (time() - t)
if sleep_t > 0
sleep(sleep_t)
Expand Down
Loading

2 comments on commit d122f07

@louisponet
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/84212

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.3.28 -m "<description of version>" d122f0798c60bde5a157924549b38c6909cbb84e
git push origin v0.3.28

Please sign in to comment.