Skip to content

Commit ae28538

Browse files
sshaderConvex, Inc.
authored and
Convex, Inc.
committed
Make knobs for scheduled job backoff (#27280)
...also default from 5s max to 30s max. GitOrigin-RevId: 071d25eed190f5408e5d81d0886c4e555f269874
1 parent 1159d62 commit ae28538

File tree

2 files changed

+29
-7
lines changed

2 files changed

+29
-7
lines changed

crates/application/src/scheduled_jobs/mod.rs

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ use common::{
2020
knobs::{
2121
SCHEDULED_JOB_EXECUTION_PARALLELISM,
2222
SCHEDULED_JOB_GARBAGE_COLLECTION_BATCH_SIZE,
23+
SCHEDULED_JOB_GARBAGE_COLLECTION_INITIAL_BACKOFF,
24+
SCHEDULED_JOB_GARBAGE_COLLECTION_MAX_BACKOFF,
25+
SCHEDULED_JOB_INITIAL_BACKOFF,
26+
SCHEDULED_JOB_MAX_BACKOFF,
2327
SCHEDULED_JOB_RETENTION,
2428
UDF_EXECUTOR_OCC_MAX_RETRIES,
2529
},
@@ -147,9 +151,6 @@ impl<RT: Runtime> ScheduledJobRunner<RT> {
147151
}
148152
}
149153

150-
const INITIAL_BACKOFF: Duration = Duration::from_millis(10);
151-
const MAX_BACKOFF: Duration = Duration::from_secs(5);
152-
153154
pub struct ScheduledJobExecutor<RT: Runtime> {
154155
context: ScheduledJobContext<RT>,
155156
pause_client: PauseClient,
@@ -195,7 +196,8 @@ impl<RT: Runtime> ScheduledJobExecutor<RT> {
195196
pause_client,
196197
};
197198
async move {
198-
let mut backoff = Backoff::new(INITIAL_BACKOFF, MAX_BACKOFF);
199+
let mut backoff =
200+
Backoff::new(*SCHEDULED_JOB_INITIAL_BACKOFF, *SCHEDULED_JOB_MAX_BACKOFF);
199201
while let Err(mut e) = executor.run(&mut backoff).await {
200202
let delay = executor.rt.with_rng(|rng| backoff.fail(rng));
201203
tracing::error!("Scheduled job executor failed, sleeping {delay:?}");
@@ -417,7 +419,7 @@ impl<RT: Runtime> ScheduledJobContext<RT> {
417419
job: ScheduledJob,
418420
job_id: ResolvedDocumentId,
419421
) -> ResolvedDocumentId {
420-
let mut backoff = Backoff::new(INITIAL_BACKOFF, MAX_BACKOFF);
422+
let mut backoff = Backoff::new(*SCHEDULED_JOB_INITIAL_BACKOFF, *SCHEDULED_JOB_MAX_BACKOFF);
421423
loop {
422424
// Generate a new request_id for every schedule job execution attempt.
423425
let request_id = RequestId::new();
@@ -731,7 +733,8 @@ impl<RT: Runtime> ScheduledJobContext<RT> {
731733
// Mark the job as completed. Keep trying until we succeed (or
732734
// detect the job state has changed). Don't bubble up the error
733735
// since otherwise we will lose the original execution logs.
734-
let mut backoff = Backoff::new(INITIAL_BACKOFF, MAX_BACKOFF);
736+
let mut backoff =
737+
Backoff::new(*SCHEDULED_JOB_INITIAL_BACKOFF, *SCHEDULED_JOB_MAX_BACKOFF);
735738
while let Err(mut err) = self
736739
.complete_action(job_id, &updated_job, usage_tracker.clone(), state.clone())
737740
.await
@@ -844,7 +847,10 @@ impl<RT: Runtime> ScheduledJobGarbageCollector<RT> {
844847
pub fn start(rt: RT, database: Database<RT>) -> impl Future<Output = ()> + Send {
845848
let garbage_collector = Self { rt, database };
846849
async move {
847-
let mut backoff = Backoff::new(INITIAL_BACKOFF, MAX_BACKOFF);
850+
let mut backoff = Backoff::new(
851+
*SCHEDULED_JOB_GARBAGE_COLLECTION_INITIAL_BACKOFF,
852+
*SCHEDULED_JOB_GARBAGE_COLLECTION_MAX_BACKOFF,
853+
);
848854
while let Err(mut e) = garbage_collector.run(&mut backoff).await {
849855
let delay = garbage_collector.rt.with_rng(|rng| backoff.fail(rng));
850856
tracing::error!("Scheduled job garbage collector failed, sleeping {delay:?}");

crates/common/src/knobs.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,22 @@ pub static TRANSACTION_MAX_SCHEDULED_TOTAL_ARGUMENT_SIZE_BYTES: LazyLock<usize>
240240
pub static SCHEDULED_JOB_EXECUTION_PARALLELISM: LazyLock<usize> =
241241
LazyLock::new(|| env_config("SCHEDULED_JOB_EXECUTION_PARALLELISM", 10));
242242

243+
/// Initial backoff in milliseconds on a system error from a scheduled job.
244+
pub static SCHEDULED_JOB_INITIAL_BACKOFF: LazyLock<Duration> =
245+
LazyLock::new(|| Duration::from_millis(env_config("SCHEDULED_JOB_INITIAL_BACKOFF_MS", 10)));
246+
247+
/// Max backoff in seconds on a system error from a scheduled job.
248+
pub static SCHEDULED_JOB_MAX_BACKOFF: LazyLock<Duration> =
249+
LazyLock::new(|| Duration::from_secs(env_config("SCHEDULED_JOB_MAX_BACKOFF_SECS", 30)));
250+
251+
/// Initial backoff in milliseconds on a system error from a scheduled job.
252+
pub static SCHEDULED_JOB_GARBAGE_COLLECTION_INITIAL_BACKOFF: LazyLock<Duration> =
253+
LazyLock::new(|| Duration::from_millis(env_config("SCHEDULED_JOB_INITIAL_BACKOFF_MS", 10)));
254+
255+
/// Max backoff in seconds on a system error from a scheduled job.
256+
pub static SCHEDULED_JOB_GARBAGE_COLLECTION_MAX_BACKOFF: LazyLock<Duration> =
257+
LazyLock::new(|| Duration::from_secs(env_config("SCHEDULED_JOB_MAX_BACKOFF_SECS", 30)));
258+
243259
/// How long completed scheduled jobs are kept before getting garbage collected.
244260
pub static SCHEDULED_JOB_RETENTION: LazyLock<Duration> = LazyLock::new(|| {
245261
Duration::from_secs(env_config(

0 commit comments

Comments
 (0)