Skip to content

Commit

Permalink
[tmpnet] Deploy collectors with golang to simplify cross-repo use
Browse files Browse the repository at this point in the history
Previously, prometheus and promtail were installed and launched by
with bash scripts. Migrating installation to nix and launch to golang
enables directly sharing the functionality with subnet-evm and
hypersdk. No more having to copy and maintain copies of the scripts in
multiple repos.
  • Loading branch information
maru-ava committed Feb 2, 2025
1 parent 827d7ea commit 15c496b
Show file tree
Hide file tree
Showing 9 changed files with 279 additions and 225 deletions.
21 changes: 4 additions & 17 deletions .github/actions/run-monitored-tmpnet-cmd/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,23 +36,6 @@ inputs:
runs:
using: composite
steps:
- name: Start prometheus
# Only run for the original repo; a forked repo won't have access to the monitoring credentials
if: (inputs.prometheus_username != '')
shell: bash
# Assumes calling project has a nix flake that ensures a compatible prometheus
run: nix develop --impure --command bash -x ./scripts/run_prometheus.sh
env:
PROMETHEUS_USERNAME: ${{ inputs.prometheus_username }}
PROMETHEUS_PASSWORD: ${{ inputs.prometheus_password }}
- name: Start promtail
if: (inputs.prometheus_username != '')
shell: bash
# Assumes calling project has a nix flake that ensures a compatible promtail
run: nix develop --impure --command bash -x ./scripts/run_promtail.sh
env:
LOKI_USERNAME: ${{ inputs.loki_username }}
LOKI_PASSWORD: ${{ inputs.loki_password }}
- name: Notify of metrics availability
if: (inputs.prometheus_username != '')
shell: bash
Expand All @@ -65,6 +48,10 @@ runs:
shell: bash
run: ${{ inputs.run }}
env:
LOKI_USERNAME: ${{ inputs.loki_username }}
LOKI_PASSWORD: ${{ inputs.loki_password }}
PROMETHEUS_USERNAME: ${{ inputs.prometheus_username }}
PROMETHEUS_PASSWORD: ${{ inputs.prometheus_password }}
GH_REPO: ${{ inputs.repository_owner }}/${{ inputs.repository_name }}
GH_WORKFLOW: ${{ inputs.workflow }}
GH_RUN_ID: ${{ inputs.run_id }}
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ jobs:
- name: Run e2e tests
uses: ./.github/actions/run-monitored-tmpnet-cmd
with:
run: E2E_SERIAL=1 ./scripts/tests.e2e.sh --delay-network-shutdown
run: E2E_SERIAL=1 nix develop --impure --command bash -x ./scripts/tests.e2e.sh --enable-collectors
prometheus_username: ${{ secrets.PROMETHEUS_ID || '' }}
prometheus_password: ${{ secrets.PROMETHEUS_PASSWORD || '' }}
loki_username: ${{ secrets.LOKI_ID || '' }}
Expand All @@ -87,7 +87,7 @@ jobs:
- name: Run e2e tests with existing network
uses: ./.github/actions/run-monitored-tmpnet-cmd
with:
run: E2E_SERIAL=1 ./scripts/tests.e2e.existing.sh --delay-network-shutdown
run: E2E_SERIAL=1 nix develop --impure --command bash -x ./scripts/tests.e2e.existing.sh --enable-collectors
prometheus_username: ${{ secrets.PROMETHEUS_ID || '' }}
prometheus_password: ${{ secrets.PROMETHEUS_PASSWORD || '' }}
loki_username: ${{ secrets.LOKI_ID || '' }}
Expand All @@ -112,8 +112,8 @@ jobs:
- name: Run e2e tests
uses: ./.github/actions/run-monitored-tmpnet-cmd
with:
run: ./scripts/tests.upgrade.sh
filter_by_owner: avalanchego-e2e
run: nix develop --impure --command bash -x ./scripts/tests.upgrade.sh
filter_by_owner: avalanchego-upgrade
prometheus_username: ${{ secrets.PROMETHEUS_ID || '' }}
prometheus_password: ${{ secrets.PROMETHEUS_PASSWORD || '' }}
loki_username: ${{ secrets.LOKI_ID || '' }}
Expand Down
93 changes: 0 additions & 93 deletions scripts/run_prometheus.sh

This file was deleted.

91 changes: 0 additions & 91 deletions scripts/run_promtail.sh

This file was deleted.

4 changes: 4 additions & 0 deletions tests/fixture/e2e/env.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ func NewTestEnvironment(tc tests.TestContext, flagVars *FlagVars, desiredNetwork
}
}

if flagVars.EnableCollectors() {
require.NoError(tmpnet.EnsureCollectorsRunning(tc.Log()))
}

// Start a new network
if network == nil {
network = desiredNetwork
Expand Down
36 changes: 18 additions & 18 deletions tests/fixture/e2e/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,16 @@ import (
"github.com/ava-labs/avalanchego/tests/fixture/tmpnet"
)

// Ensure that this value takes into account the scrape_interval
// defined in scripts/run_prometheus.sh.
const networkShutdownDelay = 12 * time.Second

type FlagVars struct {
avalancheGoExecPath string
pluginDir string
networkDir string
reuseNetwork bool
delayNetworkShutdown bool
startNetwork bool
stopNetwork bool
restartNetwork bool
nodeCount int
avalancheGoExecPath string
pluginDir string
networkDir string
reuseNetwork bool
enableCollectors bool
startNetwork bool
stopNetwork bool
restartNetwork bool
nodeCount int
}

func (v *FlagVars) AvalancheGoExecPath() string {
Expand Down Expand Up @@ -54,10 +50,14 @@ func (v *FlagVars) RestartNetwork() bool {
return v.restartNetwork
}

func (v *FlagVars) EnableCollectors() bool {
return v.enableCollectors
}

func (v *FlagVars) NetworkShutdownDelay() time.Duration {
if v.delayNetworkShutdown {
if v.enableCollectors {
// Only return a non-zero value if the delay is enabled.
return networkShutdownDelay
return tmpnet.NetworkShutdownDelay
}
return 0
}
Expand Down Expand Up @@ -121,10 +121,10 @@ func RegisterFlags() *FlagVars {
"[optional] restart an existing network previously started with --reuse-network. Useful for ensuring a network is running with the current state of binaries on disk. Ignored if a network is not already running or --stop-network is provided.",
)
flag.BoolVar(
&vars.delayNetworkShutdown,
"delay-network-shutdown",
&vars.enableCollectors,
"enable-collectors",
false,
"[optional] whether to delay network shutdown to allow a final metrics scrape.",
"[optional] whether to enable collectors of logs and metrics from nodes of the temporary network.",
)
flag.BoolVar(
&vars.startNetwork,
Expand Down
2 changes: 1 addition & 1 deletion tests/fixture/e2e/metrics_link.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ var _ = ginkgo.AfterEach(func() {
// Extend the end time by the shutdown delay (a proxy for the metrics
// scrape interval) to maximize the chances of the specified duration
// including all metrics relevant to the current spec.
endTime := time.Now().Add(networkShutdownDelay).UnixMilli()
endTime := time.Now().Add(tmpnet.NetworkShutdownDelay).UnixMilli()
metricsLink := tmpnet.MetricsLinkForNetwork(
env.GetNetwork().UUID,
strconv.FormatInt(startTime, 10),
Expand Down
7 changes: 6 additions & 1 deletion tests/fixture/tmpnet/node_process.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,12 @@ func (p *NodeProcess) getProcess() (*os.Process, error) {
return nil, nil
}

proc, err := os.FindProcess(p.pid)
return getProcess(p.pid)
}

// getProcess retrieves the process if it is running.
func getProcess(pid int) (*os.Process, error) {
proc, err := os.FindProcess(pid)
if err != nil {
return nil, fmt.Errorf("failed to find process: %w", err)
}
Expand Down
Loading

0 comments on commit 15c496b

Please sign in to comment.