Skip to content

Commit

Permalink
bmc log (#29)
Browse files Browse the repository at this point in the history
* bmc log

Signed-off-by: weizhoublue <[email protected]>

* typo

Signed-off-by: weizhoublue <[email protected]>

* typo

Signed-off-by: weizhoublue <[email protected]>

---------

Signed-off-by: weizhoublue <[email protected]>
  • Loading branch information
weizhoublue authored Jan 17, 2025
1 parent 6e43e25 commit be83a70
Show file tree
Hide file tree
Showing 14 changed files with 272 additions and 3 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
v0.3.1
v0.4.0
4 changes: 2 additions & 2 deletions chart/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ name: bmc-operator
description: A Helm chart for BMC Operator

# This is the chart version, which will be taken from VERSION file
version: 0.3.1
version: 0.4.0

# This is the version number of the application being deployed, which will be taken from VERSION file
appVersion: "0.3.1"
appVersion: "0.4.0"

type: application

Expand Down
23 changes: 23 additions & 0 deletions chart/crds/bmc.spidernet.io_hoststatuses.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,35 @@ spec:
type: object
lastUpdateTime:
type: string
log:
properties:
lastestLog:
properties:
message:
type: string
time:
type: string
required:
- message
- time
type: object
totalLogAccount:
format: int32
type: integer
warningLogAccount:
format: int32
type: integer
required:
- totalLogAccount
- warningLogAccount
type: object
required:
- basic
- clusterAgent
- healthy
- info
- lastUpdateTime
- log
type: object
type: object
served: true
Expand Down
7 changes: 7 additions & 0 deletions chart/templates/agent-templates.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ data:
value: {{ .Values.clusterAgent.feature.hostStatusUpdateInterval | quote }}
- name: LOG_LEVEL
value: {{ .Values.clusterAgent.feature.logLevel | quote }}
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
args:
- --metrics-bind-address=:8080
- --health-probe-bind-address=:8081
Expand Down Expand Up @@ -142,6 +146,9 @@ data:
rules:
- apiGroups: [""]
resources: ["pods", "services", "configmaps", "secrets"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["events"]
verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
- apiGroups: ["apps"]
resources: ["deployments"]
Expand Down
22 changes: 22 additions & 0 deletions doc/usage/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -308,3 +308,25 @@ test-hostendpoint bmc-clusteragent true 192.168.0.50 ho
1. 进入 agent pod 中,查看 DHCP server 的实时 IP 分配文件 `/var/lib/dhcp/bmc-clusteragent-dhcpd.leases`,确认和删除其中期望解除绑定的 IP 地址
2. `kubectl get hoststatus -l status.basic.ipAddr=<IP>` 查看 hoststatus 对象,确认其中的 IP 和 MAC 地址符合删除预期,然后手动删除对应的 hoststatus 对象 `kubectl delete hoststatus -l status.basic.ipAddr=192.168.0.101`
3. 后端会自动更新 DHCP server 的配置,实现 IP 和 MAC 地址的解绑(可进入 agent pod 中,查看文件 `/etc/dhcp/dhcpd.conf` 确认)

3. 查看 BMC 主机的日志

```bash
# 获取所有 BMC 主机的日志
kubectl get events -n bmc --field-selector reason=BMCLogEntry

# 获取指定 BMC 主机的日志
kubectl get events -n bmc --field-selector reason=BMCLogEntry,involvedObject.name=${HoststatusName}

# 获取指定 BMC 主机的日志统计
kubectl get hoststatus ${HoststatusName} -n bmc -o jsonpath='{.status.log}' | jq .
{
"lastestLog": {
"message": "[][2018-08-31T13:33:54+00:00][]: [ PS1 Status ] Power Supply Failure",
"time": "2018-08-31T13:33:54+00:00"
},
"totalLogAccount": 67,
"warningLogAccount": 67
}

```
5 changes: 5 additions & 0 deletions pkg/agent/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ type AgentConfig struct {
Password string
// 主机状态更新间隔(秒)
HostStatusUpdateInterval int
// pod namespace
PodNamespace string
}

// ValidateEndpointConfig validates the endpoint configuration
Expand Down Expand Up @@ -175,6 +177,8 @@ func LoadAgentConfig(k8sClient *kubernetes.Clientset) (*AgentConfig, error) {
return nil, fmt.Errorf("CLUSTERAGENT_NAME environment variable not set")
}

ns := os.Getenv("POD_NAMESPACE")

updateInterval := 60 // 默认 60 秒
intervalStr := os.Getenv("HOST_STATUS_UPDATE_INTERVAL")
if intervalStr == "" {
Expand Down Expand Up @@ -225,6 +229,7 @@ func LoadAgentConfig(k8sClient *kubernetes.Clientset) (*AgentConfig, error) {
ClusterAgentName: agentName,
AgentObjSpec: clusterAgent.Spec,
HostStatusUpdateInterval: updateInterval,
PodNamespace: ns,
}

// Validate endpoint configuration
Expand Down
5 changes: 5 additions & 0 deletions pkg/agent/hostendpoint/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,11 @@ func (r *HostEndpointReconciler) handleHostEndpoint(ctx context.Context, hostEnd
Port: *hostEndpoint.Spec.Port,
},
Info: map[string]string{},
Log: bmcv1beta1.LogStruct{
TotalLogAccount: 0,
WarningLogAccount: 0,
LastestLog: nil,
},
}

if err := r.client.Status().Update(ctx, hostStatus); err != nil {
Expand Down
79 changes: 79 additions & 0 deletions pkg/agent/hoststatus/HostStatusReconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@ import (
//"github.com/spidernet-io/bmc/pkg/lock"
"github.com/spidernet-io/bmc/pkg/log"
"github.com/spidernet-io/bmc/pkg/redfish"

gofishredfish "github.com/stmcginnis/gofish/redfish"

"go.uber.org/zap"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
Expand All @@ -24,6 +28,58 @@ var hostStatusLock = &sync.Mutex{}

// ------------------------------ update the spec.info of the hoststatus

// GenerateEvents creates Kubernetes events from Redfish log entries and returns the latest message and count
func (c *hostStatusController) GenerateEvents(logEntrys []*gofishredfish.LogEntry, hostStatusName string, lastLogTime string) (newLastestTime, newLastestMsg string, totalMsgCount, warningMsgCount, newLogAccount int) {
totalMsgCount = 0
warningMsgCount = 0
newLogAccount = 0
newLastestTime = ""
newLastestMsg = ""

if len(logEntrys) == 0 {
return
}

totalMsgCount = len(logEntrys)
for m, entry := range logEntrys {
//log.Logger.Debugf("log service entries[%d] timestamp: %+v", m, entry.Created)
//log.Logger.Debugf("log service entries[%d] severity: %+v", m, entry.Severity)
//log.Logger.Debugf("log service entries[%d] oemSensorType: %+v", m, entry.OemSensorType)
//log.Logger.Debugf("log service entries[%d] message: %+v", m, entry.Message)

msg := fmt.Sprintf("[%s][%s]: %s %s", entry.Created, entry.Severity, entry.OemSensorType, entry.Message)

ty := corev1.EventTypeNormal
if entry.Severity != gofishredfish.OKEventSeverity && entry.Severity != "" {
ty = corev1.EventTypeWarning
warningMsgCount++
}

// 所有的新日志,生成 event
if entry.Created != lastLogTime {
newLogAccount++
log.Logger.Infof("find new log for hostStatus %s: %s", hostStatusName, msg)

// 确认是否有新日志了
if m == 0 {
newLastestTime = entry.Created
newLastestMsg = msg
}

// Create event
t := &corev1.ObjectReference{
Kind: bmcv1beta1.KindHostStatus,
Name: hostStatusName,
Namespace: c.config.PodNamespace,
APIVersion: bmcv1beta1.APIVersion,
}
c.recorder.Event(t, ty, "BMCLogEntry", msg)

}
}
return
}

// this is called by UpdateHostStatusAtInterval and UpdateHostStatusWrapper
func (c *hostStatusController) UpdateHostStatusInfo(name string, d *hoststatusdata.HostConnectCon) (bool, error) {

Expand Down Expand Up @@ -79,6 +135,29 @@ func (c *hostStatusController) UpdateHostStatusInfo(name string, d *hoststatusda
log.Logger.Infof("HostStatus %s change from %v to %v , update status", name, existing.Status.Healthy, healthy)
}

// 获取日志
if healthy {
logEntrys, err := client.GetLog()
if err != nil {
log.Logger.Errorf("Failed to get logs of HostStatus %s: %v", name, err)
} else {
lastLogTime := ""
if updated.Status.Log.LastestLog != nil {
lastLogTime = updated.Status.Log.LastestLog.Time
}
newLastestTime, newLastestMsg, totalMsgCount, warningMsgCount, newLogAccount := c.GenerateEvents(logEntrys, name, lastLogTime)
if newLastestTime != "" {
updated.Status.Log.TotalLogAccount = int32(totalMsgCount)
updated.Status.Log.WarningLogAccount = int32(warningMsgCount)
updated.Status.Log.LastestLog = &bmcv1beta1.LogEntry{
Time: newLastestTime,
Message: newLastestMsg,
}
log.Logger.Infof("find %d new logs for hostStatus %s", newLogAccount, name)
}
}
}

// 更新 HostStatus
if !compareHostStatus(updated.Status, existing.Status, log.Logger) {
log.Logger.Debugf("status changed, existing: %v, updated: %v", existing.Status, updated.Status)
Expand Down
5 changes: 5 additions & 0 deletions pkg/agent/hoststatus/dhcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,11 @@ func (c *hostStatusController) handleDHCPAdd(client dhcptypes.ClientInfo) error
ActiveDhcpClient: true,
},
Info: map[string]string{},
Log: bmcv1beta1.LogStruct{
TotalLogAccount: 0,
WarningLogAccount: 0,
LastestLog: nil,
},
}
if c.config.AgentObjSpec.Endpoint.SecretName != "" {
hostStatus.Status.Basic.SecretName = c.config.AgentObjSpec.Endpoint.SecretName
Expand Down
11 changes: 11 additions & 0 deletions pkg/agent/hoststatus/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ import (
"sync"

"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/record"
typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
corev1 "k8s.io/api/core/v1"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"

Expand All @@ -29,17 +32,25 @@ type hostStatusController struct {
deleteChan chan types.ClientInfo
stopCh chan struct{}
wg sync.WaitGroup
recorder record.EventRecorder
}

func NewHostStatusController(kubeClient kubernetes.Interface, config *config.AgentConfig, mgr ctrl.Manager) HostStatusController {
log.Logger.Debugf("Creating new HostStatus controller for cluster agent: %s", config.ClusterAgentName)

// Create event recorder
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")})
recorder := eventBroadcaster.NewRecorder(mgr.GetScheme(), corev1.EventSource{Component: "bmc-controller"})

controller := &hostStatusController{
client: mgr.GetClient(),
kubeClient: kubeClient,
config: config,
addChan: make(chan types.ClientInfo),
deleteChan: make(chan types.ClientInfo),
stopCh: make(chan struct{}),
recorder: recorder,
}

log.Logger.Debugf("HostStatus controller created successfully")
Expand Down
14 changes: 14 additions & 0 deletions pkg/k8s/apis/bmc.spidernet.io/v1beta1/hoststatus_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,20 @@ type HostStatusStatus struct {
LastUpdateTime string `json:"lastUpdateTime"`
Basic BasicInfo `json:"basic"`
Info map[string]string `json:"info"`
Log LogStruct `json:"log"`
}

type LogStruct struct {
// +kubebuilder:validation:Required
TotalLogAccount int32 `json:"totalLogAccount"`
WarningLogAccount int32 `json:"warningLogAccount"`
// +optional
LastestLog *LogEntry `json:"lastestLog,omitempty"`
}

type LogEntry struct {
Time string `json:"time"`
Message string `json:"message"`
}

type BasicInfo struct {
Expand Down
36 changes: 36 additions & 0 deletions pkg/k8s/apis/bmc.spidernet.io/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions pkg/redfish/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package redfish

import (
"fmt"
"github.com/stmcginnis/gofish/redfish"
"reflect"

"github.com/spidernet-io/bmc/pkg/agent/hoststatus/data"
Expand All @@ -13,6 +14,7 @@ import (
type RefishClient interface {
Power(string) error
GetInfo() (map[string]string, error)
GetLog() ([]*redfish.LogEntry, error)
}

// redfishClient 实现了 Client 接口
Expand Down
Loading

0 comments on commit be83a70

Please sign in to comment.