diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 02140993a49..72b3cf6f444 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -203,6 +203,7 @@ jobs:
- { go-build-tags: withbootassetstesting, skip-coverage: false, snapd-debug: false, go-test-race: false}
- { go-build-tags: nosecboot, skip-coverage: false, snapd-debug: false, go-test-race: false}
- { go-build-tags: faultinject, skip-coverage: false, snapd-debug: false, go-test-race: false}
+ - { go-build-tags: statelocktrace, skip-coverage: true, snapd-debug: false, go-test-race: false}
- { go-build-tags: snapdusergo, skip-coverage: false, snapd-debug: false, go-test-race: false}
- { go-build-tags: "", skip-coverage: true, snapd-debug: false, go-test-race: true }
diff --git a/build-aux/snap/snapcraft.yaml b/build-aux/snap/snapcraft.yaml
index 9d53c2c7417..316d7e52d2f 100644
--- a/build-aux/snap/snapcraft.yaml
+++ b/build-aux/snap/snapcraft.yaml
@@ -366,14 +366,14 @@ parts:
TAGS+=(nomanagers)
case "${VERSION}" in
1337.*)
- TAGS+=(withtestkeys faultinject)
+ TAGS+=(withtestkeys faultinject statelocktrace)
;;
esac
;;
*)
case "${VERSION}" in
1337.*)
- TAGS+=(withtestkeys withbootassetstesting faultinject)
+ TAGS+=(withtestkeys withbootassetstesting faultinject statelocktrace)
;;
esac
;;
diff --git a/osutil/flock.go b/osutil/flock.go
index b3a3fd74e64..ac81eb1c5c8 100644
--- a/osutil/flock.go
+++ b/osutil/flock.go
@@ -55,6 +55,12 @@ func NewFileLockWithMode(path string, mode os.FileMode) (*FileLock, error) {
return l, nil
}
+// NewFileLockWithFile wraps an open file with a file lock which can be used for
+// locking.
+func NewFileLockWithFile(f *os.File) *FileLock {
+ return &FileLock{file: f}
+}
+
// NewFileLock creates and opens the lock file given by "path" with mode 0600.
func NewFileLock(path string) (*FileLock, error) {
return NewFileLockWithMode(path, 0600)
diff --git a/osutil/flock_test.go b/osutil/flock_test.go
index 47826c4c6f9..3ad63a64d76 100644
--- a/osutil/flock_test.go
+++ b/osutil/flock_test.go
@@ -83,6 +83,19 @@ func (s *flockSuite) TestNewFileLock(c *C) {
c.Assert(fi.Mode().Perm(), Equals, os.FileMode(0600))
}
+// Test that opening and closing a lock works as expected, and that the mode is right.
+func (s *flockSuite) TestNewFileLockWithFile(c *C) {
+ myfile, err := os.OpenFile(filepath.Join(c.MkDir(), "name"), os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0600)
+ c.Assert(err, IsNil)
+
+ lock := osutil.NewFileLockWithFile(myfile)
+ defer lock.Close()
+
+ fi, err := os.Stat(lock.Path())
+ c.Assert(err, IsNil)
+ c.Assert(fi.Mode().Perm(), Equals, os.FileMode(0600))
+}
+
// Test that we can access the underlying open file.
func (s *flockSuite) TestFile(c *C) {
fname := filepath.Join(c.MkDir(), "name")
diff --git a/overlord/state/locktrace.go b/overlord/state/locktrace.go
new file mode 100644
index 00000000000..9c2877d2603
--- /dev/null
+++ b/overlord/state/locktrace.go
@@ -0,0 +1,121 @@
+// -*- Mode: Go; indent-tabs-mode: t -*-
+//go:build statelocktrace
+
+/*
+ * Copyright (C) 2021 Canonical Ltd
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 3 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ *
+ */
+
+package state
+
+import (
+ "fmt"
+ "os"
+ "runtime"
+ "time"
+
+ "github.com/snapcore/snapd/osutil"
+)
+
+var (
+ traceStateLock = false
+
+ traceThreshold = int64(0)
+ traceFilePath = ""
+)
+
+func init() {
+ if !osutil.GetenvBool("SNAPPY_TESTING") {
+ return
+ }
+
+ threshold := osutil.GetenvInt64("SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS")
+ logFilePath := os.Getenv("SNAPD_STATE_LOCK_TRACE_FILE")
+
+ if threshold <= 0 || logFilePath == "" {
+ return
+ }
+
+ traceThreshold = threshold
+ traceFilePath = logFilePath
+ traceStateLock = true
+}
+
+func traceCallers(ts, heldMs, waitMs int64) error {
+ if traceFilePath == "" {
+ return fmt.Errorf("internal error: trace file path unset")
+ }
+
+ logFile, err := os.OpenFile(traceFilePath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0600)
+ if err != nil {
+ return fmt.Errorf("cannot not open/create log trace file: %v", err)
+ }
+ lockFile := osutil.NewFileLockWithFile(logFile)
+ defer lockFile.Close()
+
+ if err := lockFile.Lock(); err != nil {
+ return fmt.Errorf("cannot take file lock: %v", err)
+ }
+
+ pc := make([]uintptr, 10)
+ // avoid 3 first callers on the stack: runtime.Callers(), this function and the parent
+ n := runtime.Callers(3, pc)
+ frames := runtime.CallersFrames(pc[:n])
+
+ _, err = fmt.Fprintf(logFile, "### %s lock: held: %d ms wait %d ms\n",
+ time.UnixMilli(ts),
+ heldMs, waitMs)
+ if err != nil {
+ return err
+ }
+
+ for {
+ frame, more := frames.Next()
+ _, err := fmt.Fprintf(logFile, "%s:%d %s\n", frame.File, frame.Line, frame.Function)
+ if err != nil {
+ return err
+ }
+
+ if !more {
+ break
+ }
+ }
+
+ return nil
+}
+
+func lockTimestamp() int64 {
+ if !traceStateLock {
+ return 0
+ }
+
+ return time.Now().UnixMilli()
+}
+
+// maybeSaveLockTime allows to save lock times when this overpass the threshold
+// defined by through the SNAPD_STATE_LOCK_THRESHOLD_MS environment settings.
+func maybeSaveLockTime(lockWaitStart, lockHoldStart, now int64) {
+ if !traceStateLock {
+ return
+ }
+
+ heldMs := now - lockHoldStart
+ waitMs := lockHoldStart - lockWaitStart
+ if heldMs > traceThreshold || waitMs > traceThreshold {
+ if err := traceCallers(now, heldMs, waitMs); err != nil {
+ fmt.Fprintf(os.Stderr, "could write state lock trace: %v\n", err)
+ }
+ }
+}
diff --git a/overlord/state/locktrace_fake.go b/overlord/state/locktrace_fake.go
new file mode 100644
index 00000000000..577ad0bc2d2
--- /dev/null
+++ b/overlord/state/locktrace_fake.go
@@ -0,0 +1,28 @@
+// -*- Mode: Go; indent-tabs-mode: t -*-
+//go:build !statelocktrace
+
+/*
+ * Copyright (C) 2021 Canonical Ltd
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 3 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ *
+ */
+
+package state
+
+func lockTimestamp() int64 {
+ return int64(0)
+}
+
+func maybeSaveLockTime(lockWaitStart, lockHoldStart, now int64) {
+}
diff --git a/overlord/state/state.go b/overlord/state/state.go
index 21e0f3cbeb6..f56ff27735d 100644
--- a/overlord/state/state.go
+++ b/overlord/state/state.go
@@ -111,6 +111,9 @@ type State struct {
// task/changes observing
taskHandlers map[int]func(t *Task, old, new Status) (remove bool)
changeHandlers map[int]func(chg *Change, old, new Status)
+
+ lockWaitStart int64
+ lockHoldStart int64
}
// New returns a new empty state.
@@ -139,8 +142,11 @@ func (s *State) Modified() bool {
// Lock acquires the state lock.
func (s *State) Lock() {
+ lockWait := lockTimestamp()
s.mu.Lock()
atomic.AddInt32(&s.muC, 1)
+ s.lockWaitStart = lockWait
+ s.lockHoldStart = lockTimestamp()
}
func (s *State) reading() {
@@ -158,7 +164,11 @@ func (s *State) writing() {
func (s *State) unlock() {
atomic.AddInt32(&s.muC, -1)
+ lockWaitStart, lockHoldStart := s.lockWaitStart, s.lockHoldStart
+ s.lockWaitStart, s.lockHoldStart = 0, 0
+ lockHoldEnd := lockTimestamp()
s.mu.Unlock()
+ maybeSaveLockTime(lockWaitStart, lockHoldStart, lockHoldEnd)
}
type marshalledState struct {
diff --git a/packaging/ubuntu-16.04/rules b/packaging/ubuntu-16.04/rules
index 8043be3a64b..d6e8f56790b 100755
--- a/packaging/ubuntu-16.04/rules
+++ b/packaging/ubuntu-16.04/rules
@@ -103,8 +103,8 @@ _SNAP_TAGS=
# check if we need to include the testkeys in the binary
ifneq (,$(filter testkeys,$(DEB_BUILD_OPTIONS)))
# if enabled also enable bootloader assets testing and fault injection
- _TAGS := withtestkeys,withbootassetstesting,faultinject
- _SNAP_TAGS := nomanagers,withtestkeys,faultinject
+ _TAGS := withtestkeys,withbootassetstesting,faultinject,statelocktrace
+ _SNAP_TAGS := nomanagers,withtestkeys,faultinject,statelocktrace
else
_SNAP_TAGS=nomanagers
endif
diff --git a/spread.yaml b/spread.yaml
index cc1e95265a5..8f2da5ba95a 100644
--- a/spread.yaml
+++ b/spread.yaml
@@ -84,6 +84,8 @@ environment:
SNAPD_DEB_FROM_REPO: '$(HOST: echo "${SPREAD_SNAPD_DEB_FROM_REPO:-true}")'
# Directory where the built snapd snaps and other assets are stored
SNAPD_WORK_DIR: '$(HOST: echo "${SPREAD_SNAPD_WORK_DIR:-/var/tmp/work-dir}")'
+ # Threshold used to determine which stake locks have to be collected during runtime
+ SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS: '$(HOST: echo "${SPREAD_SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS:-0}")'
# Directory where the nested images and test assets are stored
NESTED_WORK_DIR: '$(HOST: echo "${NESTED_WORK_DIR:-/var/tmp/work-dir}")'
diff --git a/tests/lib/prepare-restore.sh b/tests/lib/prepare-restore.sh
index 65cff46b36d..8c0dedd95e5 100755
--- a/tests/lib/prepare-restore.sh
+++ b/tests/lib/prepare-restore.sh
@@ -707,6 +707,8 @@ prepare_suite_each() {
fi
if [[ "$variant" = full ]]; then
+ # shellcheck source=tests/lib/prepare.sh
+ . "$TESTSLIB"/prepare.sh
# shellcheck source=tests/lib/prepare.sh
. "$TESTSLIB"/prepare.sh
if os.query is-classic; then
@@ -714,7 +716,7 @@ prepare_suite_each() {
else
prepare_each_core
fi
-
+ prepare_state_lock "$SPREAD_JOB"
fi
case "$SPREAD_SYSTEM" in
diff --git a/tests/lib/prepare.sh b/tests/lib/prepare.sh
index e114642a4be..4b58e299d72 100755
--- a/tests/lib/prepare.sh
+++ b/tests/lib/prepare.sh
@@ -462,6 +462,7 @@ prepare_classic() {
fi
prepare_reexec_override
+ prepare_state_lock "SNAPD PROJECT"
prepare_memory_limit_override
disable_refreshes
@@ -1524,6 +1525,39 @@ EOF
rm -rf "$UNPACK_DIR"
}
+prepare_state_lock(){
+ TAG=$1
+ CONF_FILE="/etc/systemd/system/snapd.service.d/state-lock.conf"
+ LOCKS_FILE="$TESTSTMP"/snapd_lock_traces
+ RESTART=false
+
+ if [ "$SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS" -gt 0 ]; then
+ echo "###START: $TAG" >> "$LOCKS_FILE"
+
+ # Generate the config file when it does not exist and when the threshold has changed different
+ if ! [ -f "$CONF_FILE" ] || ! grep -q "SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS=$SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS" < "$CONF_FILE"; then
+ echo "Prepare snapd for getting state lock time"
+ cat < "$CONF_FILE"
+[Service]
+Environment=SNAPPY_TESTING=1
+Environment=SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS="$SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS"
+Environment=SNAPD_STATE_LOCK_TRACE_FILE="$LOCKS_FILE"
+EOF
+ RESTART=true
+ fi
+ elif [ -f "$CONF_FILE" ]; then
+ rm -f "$CONF_FILE"
+ RESTART=true
+ fi
+
+ if [ "$RESTART" = "true" ]; then
+ # the service setting may have changed in the service so we need
+ # to ensure snapd is reloaded
+ systemctl daemon-reload
+ systemctl restart snapd
+ fi
+}
+
# prepare_ubuntu_core will prepare ubuntu-core 16+
prepare_ubuntu_core() {
# we are still a "classic" image, prepare the surgery
@@ -1637,6 +1671,7 @@ prepare_ubuntu_core() {
# or restore will break
remove_disabled_snaps
prepare_memory_limit_override
+ prepare_state_lock "SNAPD PROJECT"
setup_experimental_features
systemctl stop snapd.service snapd.socket
save_snapd_state
diff --git a/tests/main/snapd-state-lock/task.yaml b/tests/main/snapd-state-lock/task.yaml
new file mode 100644
index 00000000000..9a76eff8b3c
--- /dev/null
+++ b/tests/main/snapd-state-lock/task.yaml
@@ -0,0 +1,16 @@
+summary: smoke test used to retrieve the lock state times
+
+details: |
+ Test used to collect artifacts
+
+priority: -1
+
+artifacts:
+ - snapd_lock_traces
+
+execute: |
+ if [ -f "$TESTSTMP"/snapd_lock_traces ]; then
+ cp -f "$TESTSTMP"/snapd_lock_traces .
+ else
+ touch snapd_lock_traces
+ fi
\ No newline at end of file