Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

state store: fix logic for evaluating job status #24974

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
4 changes: 4 additions & 0 deletions nomad/job_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,10 @@ func (j *Job) Revert(args *structs.JobRevertRequest, reply *structs.JobRegisterR
// Clear out the VersionTag to prevent tag duplication
revJob.VersionTag = nil

// Set the stable flag to false as this is functionally a new registration
// and should handle deployment updates
revJob.Stable = false

reg := &structs.JobRegisterRequest{
Job: revJob,
WriteRequest: args.WriteRequest,
Expand Down
54 changes: 25 additions & 29 deletions nomad/state/state_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -3716,6 +3716,7 @@ func (s *StateStore) DeleteEval(index uint64, evals, allocs []string, userInitia
}
}

// TODO: should we really be doing this here? We don't do it for filtered evals.
// Set the job's status
if err := s.setJobStatuses(index, txn, jobs, true); err != nil {
return fmt.Errorf("setting job status failed: %v", err)
Expand Down Expand Up @@ -4824,14 +4825,13 @@ func (s *StateStore) UpdateDeploymentStatus(msgType structs.MessageType, index u
return err
}

// Upsert the job if necessary
// On failed deployments with auto_revert set to true, a new eval and job will be included on the request.
// We should upsert them both
if req.Job != nil {
if err := s.upsertJobImpl(index, nil, req.Job, false, txn); err != nil {
return err
}
}

// Upsert the optional eval
if req.Eval != nil {
if err := s.nestedUpsertEval(txn, index, req.Eval); err != nil {
return err
Expand Down Expand Up @@ -5532,6 +5532,12 @@ func (s *StateStore) setJobStatus(index uint64, txn *txn,
if err := s.setJobSummary(txn, updated, index, oldStatus, newStatus); err != nil {
return fmt.Errorf("job summary update failed %w", err)
}

// Update the job version details
if err := s.upsertJobVersion(index, updated, txn); err != nil {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This feels odd to call here, but it's necessary to also update a job's version status anytime we are updating the job summary, or they can get out of sync.

return err
}

return nil
}

Expand Down Expand Up @@ -5599,14 +5605,16 @@ func (s *StateStore) setJobSummary(txn *txn, updated *structs.Job, index uint64,
}

func (s *StateStore) getJobStatus(txn *txn, job *structs.Job, evalDelete bool) (string, error) {
// If the job has been stopped, it's status is dead
if job.Stopped() {
return structs.JobStatusDead, nil
}

// System, Periodic and Parameterized jobs are running until explicitly
// stopped.
if job.Type == structs.JobTypeSystem ||
job.IsParameterized() ||
job.IsPeriodic() {
if job.Stop {
return structs.JobStatusDead, nil
}
return structs.JobStatusRunning, nil
}

Expand All @@ -5616,40 +5624,28 @@ func (s *StateStore) getJobStatus(txn *txn, job *structs.Job, evalDelete bool) (
}

// If there is a non-terminal allocation, the job is running.
hasAlloc := false
terminalAllocs := false
for alloc := allocs.Next(); alloc != nil; alloc = allocs.Next() {
hasAlloc = true
if !alloc.(*structs.Allocation).TerminalStatus() {
return structs.JobStatusRunning, nil
}
}

evals, err := txn.Get("evals", "job_prefix", job.Namespace, job.ID)
if err != nil {
return "", err
}
a := alloc.(*structs.Allocation)

hasEval := false
for raw := evals.Next(); raw != nil; raw = evals.Next() {
e := raw.(*structs.Evaluation)

// Filter non-exact matches
if e.JobID != job.ID {
if a.Job.Version < job.Version {
continue
}

hasEval = true
if !e.TerminalStatus() {
return structs.JobStatusPending, nil
if !a.TerminalStatus() {
return structs.JobStatusRunning, nil
}

terminalAllocs = true
}

// The job is dead if all the allocations and evals are terminal or if there
// are no evals because of garbage collection.
if evalDelete || hasEval || hasAlloc {
// The job is dead if all allocations for this version are terminal.
if terminalAllocs {
return structs.JobStatusDead, nil
}

// There are no allocs yet, which will happen for new job submissions,
// running new versions of a job, or reverting.
return structs.JobStatusPending, nil
}

Expand Down
Loading
Loading