You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
nvidia.com/gpu-operator, node-feature-discovery start fail, this is log.
Observing the core log is "failed to read cpufreq directory" err="open /host-sys/devices/system/cpu/cpufreq: no such file or directory"
[root@master sys]# kubectl logs pod/gpu-operator-1732781733-node-feature-discovery-worker-sdnsn | head
I1128 08:15:36.238949 1 nfd-worker.go:293] "Node Feature Discovery Worker" version="v0.16.6" nodeName="node" namespace="gpu-operator"
I1128 08:15:36.240262 1 nfd-worker.go:622] "configuration file parsed" path="/etc/kubernetes/node-feature-discovery/nfd-worker.conf"
I1128 08:15:36.240636 1 nfd-worker.go:654] "configuration successfully updated" configuration={"Core":{"Klog":{},"LabelWhiteList":"","NoPublish":false,"FeatureSources":["all"],"Sources":null,"LabelSources":["all"],"SleepInterval":{"Duration":60000000000}},"Sources":{"cpu":{"cpuid":{"attributeBlacklist":["AVX10","BMI1","BMI2","CLMUL","CMOV","CX16","ERMS","F16C","HTT","LZCNT","MMX","MMXEXT","NX","POPCNT","RDRAND","RDSEED","RDTSCP","SGX","SGXLC","SSE","SSE2","SSE3","SSE4","SSE42","SSSE3","TDX_GUEST"]}},"custom":[],"fake":{"labels":{"fakefeature1":"true","fakefeature2":"true","fakefeature3":"true"},"flagFeatures":["flag_1","flag_2","flag_3"],"attributeFeatures":{"attr_1":"true","attr_2":"false","attr_3":"10"},"instanceFeatures":[{"attr_1":"true","attr_2":"false","attr_3":"10","attr_4":"foobar","name":"instance_1"},{"attr_1":"true","attr_2":"true","attr_3":"100","name":"instance_2"},{"name":"instance_3"}]},"kernel":{"KconfigFile":"","configOpts":["NO_HZ","NO_HZ_IDLE","NO_HZ_FULL","PREEMPT"]},"local":{},"pci":{"deviceClassWhitelist":["02","0200","0207","0300","0302"],"deviceLabelFields":["vendor"]},"usb":{"deviceClassWhitelist":["0e","ef","fe","ff"],"deviceLabelFields":["class","vendor","device"]}}}
I1128 08:15:36.262017 1 metrics.go:44] "metrics server starting" port=":8081"
E1128 08:15:36.262249 1 pstate_amd64.go:75] "failed to read cpufreq directory" err="open /host-sys/devices/system/cpu/cpufreq: no such file or directory"
[root@master sys]# !1203
ll devices/system/cpu
total 0
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu0
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu1
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu10
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu11
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu12
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu13
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu14
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu15
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu16
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu17
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu18
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu19
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu2
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu20
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu21
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu22
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu23
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu24
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu25
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu26
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu27
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu28
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu29
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu3
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu30
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu31
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu4
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu5
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu6
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu7
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu8
drwxr-xr-x. 9 root root 0 Nov 28 22:12 cpu9
drwxr-xr-x. 2 root root 0 Nov 28 22:12 cpuidle
drwxr-xr-x. 2 root root 0 Nov 28 22:12 intel_pstate
-r--r--r--. 1 root root 4096 Nov 28 22:12 isolated
-r--r--r--. 1 root root 4096 Nov 28 22:12 kernel_max
drwxr-xr-x. 2 root root 0 Nov 28 22:12 microcode
-r--r--r--. 1 root root 4096 Nov 28 22:12 modalias
-r--r--r--. 1 root root 4096 Nov 28 22:12 nohz_full
-r--r--r--. 1 root root 4096 Nov 28 22:12 offline
-r--r--r--. 1 root root 4096 Nov 14 21:22 online
-r--r--r--. 1 root root 4096 Nov 28 22:12 possible
drwxr-xr-x. 2 root root 0 Nov 28 22:12 power
-r--r--r--. 1 root root 4096 Nov 28 22:12 present
drwxr-xr-x. 2 root root 0 Nov 28 22:12 smt
-rw-r--r--. 1 root root 4096 Nov 28 22:12 uevent
drwxr-xr-x. 2 root root 0 Nov 28 22:12 vulnerabilities
[root@master sys]# ll devices/system/cpu/cpu0/
total 0
drwxr-xr-x. 6 root root 0 Nov 16 20:13 cache
drwxr-xr-x. 2 root root 0 Nov 16 20:12 cpufreq
drwxr-xr-x. 7 root root 0 Nov 28 22:23 cpuidle
-r--------. 1 root root 4096 Nov 28 22:23 crash_notes
-r--------. 1 root root 4096 Nov 28 22:23 crash_notes_size
lrwxrwxrwx. 1 root root 0 Nov 28 22:23 driver -> ../../../../bus/cpu/drivers/processor
lrwxrwxrwx. 1 root root 0 Nov 28 22:23 firmware_node -> ../../../LNXSYSTM:00/device:00/ACPI0004:00/LNXCPU:00
drwxr-xr-x. 2 root root 0 Nov 28 22:23 microcode
lrwxrwxrwx. 1 root root 0 Nov 28 22:23 node0 -> ../../node/node0
-rw-r--r--. 1 root root 4096 Nov 28 22:23 online
drwxr-xr-x. 2 root root 0 Nov 28 22:23 power
lrwxrwxrwx. 1 root root 0 Nov 28 22:23 subsystem -> ../../../../bus/cpu
drwxr-xr-x. 2 root root 0 Nov 28 22:23 thermal_throttle
drwxr-xr-x. 2 root root 0 Nov 14 21:22 topology
-rw-r--r--. 1 root root 4096 Nov 28 22:23 uevent
I found the code and think it may be related, not just whether it is correct or not.
path: source/cpu/pstate_amd64.go
// Discover p-state related features such as turbo boost.
func detectPstate() (map[string]string, error) {
// Check that sysfs is available
sysfsBase := hostpath.SysfsDir.Path("devices/system/cpu")
if _, err := os.Stat(sysfsBase); err != nil {
return nil, err
}
// some code .... ignore
// Determine scaling governor that is being used
cpufreqDir := filepath.Join(sysfsBase, "cpufreq")
policies, err := os.ReadDir(cpufreqDir)
if err != nil {
klog.ErrorS(err, "failed to read cpufreq directory")
return features, nil
}
What you expected to happen:
Compatibility with multi-core CPUs.
How to reproduce it (as minimally and precisely as possible):
According to the environment configuration, it is necessary to present it
Anything else we need to know?:
Environment:
Kubernetes version (use kubectl version): 1.23.6-0.x86_64
What happened:
nvidia.com/gpu-operator,
node-feature-discovery
start fail, this is log.Observing the core log is
"failed to read cpufreq directory" err="open /host-sys/devices/system/cpu/cpufreq: no such file or directory"
K8s config is:
But the path on my machine is like this:
I found the code and think it may be related, not just whether it is correct or not.
path:
source/cpu/pstate_amd64.go
What you expected to happen:
Compatibility with multi-core CPUs.
How to reproduce it (as minimally and precisely as possible):
According to the environment configuration, it is necessary to present it
Anything else we need to know?:
Environment:
kubectl version
): 1.23.6-0.x86_64cat /etc/os-release
): Centos7.9uname -a
): Linux master 3.10.0-1160.el7.x86_64The text was updated successfully, but these errors were encountered: