Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

overlord/state, tests: measure state lock #14874

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ jobs:
- { go-build-tags: withbootassetstesting, skip-coverage: false, snapd-debug: false, go-test-race: false}
- { go-build-tags: nosecboot, skip-coverage: false, snapd-debug: false, go-test-race: false}
- { go-build-tags: faultinject, skip-coverage: false, snapd-debug: false, go-test-race: false}
- { go-build-tags: statelocktrace, skip-coverage: true, snapd-debug: false, go-test-race: false}
- { go-build-tags: snapdusergo, skip-coverage: false, snapd-debug: false, go-test-race: false}
- { go-build-tags: "", skip-coverage: true, snapd-debug: false, go-test-race: true }

Expand Down
4 changes: 2 additions & 2 deletions build-aux/snap/snapcraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -366,14 +366,14 @@ parts:
TAGS+=(nomanagers)
case "${VERSION}" in
1337.*)
TAGS+=(withtestkeys faultinject)
TAGS+=(withtestkeys faultinject statelocktrace)
;;
esac
;;
*)
case "${VERSION}" in
1337.*)
TAGS+=(withtestkeys withbootassetstesting faultinject)
TAGS+=(withtestkeys withbootassetstesting faultinject statelocktrace)
;;
esac
;;
Expand Down
6 changes: 6 additions & 0 deletions osutil/flock.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ func NewFileLockWithMode(path string, mode os.FileMode) (*FileLock, error) {
return l, nil
}

// NewFileLockWithFile wraps an open file with a file lock which can be used for
// locking.
func NewFileLockWithFile(f *os.File) *FileLock {
sergiocazzolato marked this conversation as resolved.
Show resolved Hide resolved
return &FileLock{file: f}
}
sergiocazzolato marked this conversation as resolved.
Show resolved Hide resolved

// NewFileLock creates and opens the lock file given by "path" with mode 0600.
func NewFileLock(path string) (*FileLock, error) {
return NewFileLockWithMode(path, 0600)
Expand Down
13 changes: 13 additions & 0 deletions osutil/flock_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,19 @@ func (s *flockSuite) TestNewFileLock(c *C) {
c.Assert(fi.Mode().Perm(), Equals, os.FileMode(0600))
}

// Test that opening and closing a lock works as expected, and that the mode is right.
func (s *flockSuite) TestNewFileLockWithFile(c *C) {
myfile, err := os.OpenFile(filepath.Join(c.MkDir(), "name"), os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0600)
c.Assert(err, IsNil)

lock := osutil.NewFileLockWithFile(myfile)
defer lock.Close()

fi, err := os.Stat(lock.Path())
c.Assert(err, IsNil)
c.Assert(fi.Mode().Perm(), Equals, os.FileMode(0600))
}

// Test that we can access the underlying open file.
func (s *flockSuite) TestFile(c *C) {
fname := filepath.Join(c.MkDir(), "name")
Expand Down
121 changes: 121 additions & 0 deletions overlord/state/locktrace.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// -*- Mode: Go; indent-tabs-mode: t -*-
//go:build statelocktrace

/*
* Copyright (C) 2021 Canonical Ltd
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
* Copyright (C) 2021 Canonical Ltd
* Copyright (C) 2025 Canonical Ltd

*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 3 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/

package state

import (
"fmt"
"os"
"runtime"
"time"

"github.com/snapcore/snapd/osutil"
)

var (
traceStateLock = false

traceThreshold = int64(0)
traceFilePath = ""
)

func init() {
if !osutil.GetenvBool("SNAPPY_TESTING") {
return
}

threshold := osutil.GetenvInt64("SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS")
logFilePath := os.Getenv("SNAPD_STATE_LOCK_TRACE_FILE")

if threshold <= 0 || logFilePath == "" {
return
}

traceThreshold = threshold
traceFilePath = logFilePath
traceStateLock = true
}

func traceCallers(ts, heldMs, waitMs int64) error {
if traceFilePath == "" {
return fmt.Errorf("internal error: trace file path unset")
}

logFile, err := os.OpenFile(traceFilePath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0600)
if err != nil {
return fmt.Errorf("cannot not open/create log trace file: %v", err)
}
lockFile := osutil.NewFileLockWithFile(logFile)
defer lockFile.Close()

if err := lockFile.Lock(); err != nil {
return fmt.Errorf("cannot take file lock: %v", err)
}

pc := make([]uintptr, 10)
// avoid 3 first callers on the stack: runtime.Callers(), this function and the parent
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick

Suggested change
// avoid 3 first callers on the stack: runtime.Callers(), this function and the parent
// ignore the first 3 callers on the stack: runtime.Callers(), this function and the parent

n := runtime.Callers(3, pc)
frames := runtime.CallersFrames(pc[:n])

_, err = fmt.Fprintf(logFile, "### %s lock: held: %d ms wait %d ms\n",
time.UnixMilli(ts),
heldMs, waitMs)
if err != nil {
return err
}

for {
frame, more := frames.Next()
_, err := fmt.Fprintf(logFile, "%s:%d %s\n", frame.File, frame.Line, frame.Function)
if err != nil {
return err
}

if !more {
break
}
}

return nil
}

func lockTimestamp() int64 {
if !traceStateLock {
return 0
}

return time.Now().UnixMilli()
}

// maybeSaveLockTime allows to save lock times when this overpass the threshold
// defined by through the SNAPD_STATE_LOCK_THRESHOLD_MS environment settings.
Comment on lines +107 to +108
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minor suggestion for clarity's sake

Suggested change
// maybeSaveLockTime allows to save lock times when this overpass the threshold
// defined by through the SNAPD_STATE_LOCK_THRESHOLD_MS environment settings.
// maybeSaveLockTime logs the lock hold and wait times when either exceeds the
// threshold defined in the SNAPD_STATE_LOCK_THRESHOLD_MS environment variable.

func maybeSaveLockTime(lockWaitStart, lockHoldStart, now int64) {
if !traceStateLock {
return
}

heldMs := now - lockHoldStart
waitMs := lockHoldStart - lockWaitStart
if heldMs > traceThreshold || waitMs > traceThreshold {
if err := traceCallers(now, heldMs, waitMs); err != nil {
fmt.Fprintf(os.Stderr, "could write state lock trace: %v\n", err)
}
}
}
28 changes: 28 additions & 0 deletions overlord/state/locktrace_fake.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// -*- Mode: Go; indent-tabs-mode: t -*-
//go:build !statelocktrace

/*
* Copyright (C) 2021 Canonical Ltd
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
* Copyright (C) 2021 Canonical Ltd
* Copyright (C) 2024 Canonical Ltd

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
* Copyright (C) 2021 Canonical Ltd
* Copyright (C) 2024 Canonical Ltd

*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 3 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/

package state

func lockTimestamp() int64 {
return int64(0)
}

func maybeSaveLockTime(lockWaitStart, lockHoldStart, now int64) {
}
Comment on lines +23 to +28
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tiny nitpick, we can simplify the no-ops

Suggested change
func lockTimestamp() int64 {
return int64(0)
}
func maybeSaveLockTime(lockWaitStart, lockHoldStart, now int64) {
}
func lockTimestamp() int64 { return 0 }
func maybeSaveLockTime(int64, int64, int64) {}

10 changes: 10 additions & 0 deletions overlord/state/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ type State struct {
// task/changes observing
taskHandlers map[int]func(t *Task, old, new Status) (remove bool)
changeHandlers map[int]func(chg *Change, old, new Status)

lockWaitStart int64
lockHoldStart int64
}

// New returns a new empty state.
Expand Down Expand Up @@ -139,8 +142,11 @@ func (s *State) Modified() bool {

// Lock acquires the state lock.
func (s *State) Lock() {
lockWait := lockTimestamp()
s.mu.Lock()
atomic.AddInt32(&s.muC, 1)
s.lockWaitStart = lockWait
s.lockHoldStart = lockTimestamp()
}

func (s *State) reading() {
Expand All @@ -158,7 +164,11 @@ func (s *State) writing() {

func (s *State) unlock() {
atomic.AddInt32(&s.muC, -1)
lockWaitStart, lockHoldStart := s.lockWaitStart, s.lockHoldStart
s.lockWaitStart, s.lockHoldStart = 0, 0
lockHoldEnd := lockTimestamp()
s.mu.Unlock()
maybeSaveLockTime(lockWaitStart, lockHoldStart, lockHoldEnd)
}

type marshalledState struct {
Expand Down
4 changes: 2 additions & 2 deletions packaging/ubuntu-16.04/rules
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ _SNAP_TAGS=
# check if we need to include the testkeys in the binary
ifneq (,$(filter testkeys,$(DEB_BUILD_OPTIONS)))
# if enabled also enable bootloader assets testing and fault injection
_TAGS := withtestkeys,withbootassetstesting,faultinject
_SNAP_TAGS := nomanagers,withtestkeys,faultinject
_TAGS := withtestkeys,withbootassetstesting,faultinject,statelocktrace
_SNAP_TAGS := nomanagers,withtestkeys,faultinject,statelocktrace
else
_SNAP_TAGS=nomanagers
endif
Expand Down
2 changes: 2 additions & 0 deletions spread.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ environment:
SNAPD_DEB_FROM_REPO: '$(HOST: echo "${SPREAD_SNAPD_DEB_FROM_REPO:-true}")'
# Directory where the built snapd snaps and other assets are stored
SNAPD_WORK_DIR: '$(HOST: echo "${SPREAD_SNAPD_WORK_DIR:-/var/tmp/work-dir}")'
# Threshold used to determine which stake locks have to be collected during runtime
SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS: '$(HOST: echo "${SPREAD_SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS:-0}")'

# Directory where the nested images and test assets are stored
NESTED_WORK_DIR: '$(HOST: echo "${NESTED_WORK_DIR:-/var/tmp/work-dir}")'
Expand Down
4 changes: 3 additions & 1 deletion tests/lib/prepare-restore.sh
Original file line number Diff line number Diff line change
Expand Up @@ -707,14 +707,16 @@ prepare_suite_each() {
fi

if [[ "$variant" = full ]]; then
# shellcheck source=tests/lib/prepare.sh
. "$TESTSLIB"/prepare.sh
# shellcheck source=tests/lib/prepare.sh
. "$TESTSLIB"/prepare.sh
if os.query is-classic; then
prepare_each_classic
else
prepare_each_core
fi

prepare_state_lock "$SPREAD_JOB"
fi

case "$SPREAD_SYSTEM" in
Expand Down
35 changes: 35 additions & 0 deletions tests/lib/prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,7 @@ prepare_classic() {
fi

prepare_reexec_override
prepare_state_lock "SNAPD PROJECT"
prepare_memory_limit_override
disable_refreshes

Expand Down Expand Up @@ -1524,6 +1525,39 @@ EOF
rm -rf "$UNPACK_DIR"
}

prepare_state_lock(){
TAG=$1
CONF_FILE="/etc/systemd/system/snapd.service.d/state-lock.conf"
LOCKS_FILE="$TESTSTMP"/snapd_lock_traces
RESTART=false

if [ "$SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS" -gt 0 ]; then
echo "###START: $TAG" >> "$LOCKS_FILE"

# Generate the config file when it does not exist and when the threshold has changed different
if ! [ -f "$CONF_FILE" ] || ! grep -q "SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS=$SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS" < "$CONF_FILE"; then
echo "Prepare snapd for getting state lock time"
cat <<EOF > "$CONF_FILE"
[Service]
Environment=SNAPPY_TESTING=1
Environment=SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS="$SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS"
Environment=SNAPD_STATE_LOCK_TRACE_FILE="$LOCKS_FILE"
EOF
RESTART=true
fi
elif [ -f "$CONF_FILE" ]; then
rm -f "$CONF_FILE"
RESTART=true
fi

if [ "$RESTART" = "true" ]; then
# the service setting may have changed in the service so we need
# to ensure snapd is reloaded
systemctl daemon-reload
systemctl restart snapd
fi
}

# prepare_ubuntu_core will prepare ubuntu-core 16+
prepare_ubuntu_core() {
# we are still a "classic" image, prepare the surgery
Expand Down Expand Up @@ -1637,6 +1671,7 @@ prepare_ubuntu_core() {
# or restore will break
remove_disabled_snaps
prepare_memory_limit_override
prepare_state_lock "SNAPD PROJECT"
setup_experimental_features
systemctl stop snapd.service snapd.socket
save_snapd_state
Expand Down
16 changes: 16 additions & 0 deletions tests/main/snapd-state-lock/task.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
summary: smoke test used to retrieve the lock state times

details: |
Test used to collect artifacts

priority: -1

artifacts:
- snapd_lock_traces

execute: |
if [ -f "$TESTSTMP"/snapd_lock_traces ]; then
cp -f "$TESTSTMP"/snapd_lock_traces .
else
touch snapd_lock_traces
fi
Loading