Skip to content

Commit

Permalink
feat(jobs): add configurable autoretry
Browse files Browse the repository at this point in the history
  • Loading branch information
pl-buiquang committed Jan 31, 2025
1 parent 86fea9f commit 08e3280
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 4 deletions.
2 changes: 2 additions & 0 deletions src/main/resources/application.conf
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ app {
jobs {
threads = 20
threads = ${?JOBS_THREADS}
autoRetry = 0
autoRetry = ${?JOBS_AUTO_RETRY}
}
back {
url: ${?DJANGO_CALLBACK_URL}
Expand Down
5 changes: 3 additions & 2 deletions src/main/scala/fr/aphp/id/eds/requester/AppConfig.scala
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ case class PGConfig(
)

case class JobConfig(
threads: Int
threads: Int,
autoRetry: Int = 0
)

case class ServerConfig(
Expand Down Expand Up @@ -119,7 +120,7 @@ class AppConfig(conf: Config) {
))
} else { None }
val business: BusinessConfig = BusinessConfig(
JobConfig(conf.getInt("app.jobs.threads")),
JobConfig(conf.getInt("app.jobs.threads"), conf.getInt("app.jobs.autoRetry")),
conf.getInt("app.cohortCreationLimit"),
conf.getBoolean("app.enableCache"),
QueryConfig(
Expand Down
10 changes: 8 additions & 2 deletions src/main/scala/fr/aphp/id/eds/requester/jobs/JobManager.scala
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@ class JobManager() {
sparkSession.sparkContext.addFile(AppConfig.get.solr.get.authFile)
}

def execJob(jobExecutor: JobBase, jobData: SparkJobParameter): JobStatus = {
def execJob(jobExecutor: JobBase, jobData: SparkJobParameter, retry: Int = 0): JobStatus = {
val jobId = UUID.randomUUID().toString
logger.info(s"Starting new job ${jobId}")
val autoRetry = AppConfig.get.business.jobs.autoRetry
val jobExec = Future {
logger.info(s"Job ${jobId} started")
sparkSession.sparkContext.setJobGroup(jobId, s"new job ${jobId}", interruptOnCancel = true)
Expand All @@ -60,7 +61,12 @@ class JobManager() {
finalizeJob(jobId, Right(result), jobExecutor, jobData.mode, jobData)
case Failure(wrapped: Throwable) =>
logger.error(s"Job ${jobId} failed", wrapped)
finalizeJob(jobId, Left(wrapped), jobExecutor, jobData.mode, jobData)
if (retry < autoRetry) {
logger.info(s"Retrying job ${jobId}")
execJob(jobExecutor, jobData, retry + 1)
} else {
finalizeJob(jobId, Left(wrapped), jobExecutor, jobData.mode, jobData)
}
}
JobStatus(job.status,
job.jobId,
Expand Down
1 change: 1 addition & 0 deletions src/test/resources/application.test.conf
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ app {
enableCache = false
jobs {
threads = 100
autoRetry = 0
}
back {
url = "http://django"
Expand Down

0 comments on commit 08e3280

Please sign in to comment.