Skip to content

Commit d7d174f

Browse files
ldanilekConvex, Inc.
authored and
Convex, Inc.
committed
[components] namespace scheduled jobs (#27231)
get the scheduler working within components. this is the basic functionality, where a component schedules its own function and that function runs. a lot of the scaffolding is there for the rest of the features -- like the virtual table, cancellation, and cleanup worker, but we can address and test those later. GitOrigin-RevId: 936820825bb6346aac8bf695c845342cb846cfc6
1 parent f9f10de commit d7d174f

File tree

10 files changed

+192
-89
lines changed

10 files changed

+192
-89
lines changed

crates/application/src/application_function_runner/mod.rs

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ use usage_tracking::{
187187
use value::{
188188
heap_size::HeapSize,
189189
id_v6::DeveloperDocumentId,
190+
TableNamespace,
190191
};
191192
use vector::{
192193
PublicVectorSearchQueryResult,
@@ -2162,8 +2163,11 @@ impl<RT: Runtime> ActionCallbacks for ApplicationFunctionRunner<RT> {
21622163
tx,
21632164
)
21642165
.await?;
2165-
let virtual_id = VirtualSchedulerModel::new(tx)
2166-
.schedule(path, udf_args, scheduled_ts, context)
2166+
let (_, component) = BootstrapComponentsModel::new(tx)
2167+
.component_path_to_ids(path.component.clone())
2168+
.await?;
2169+
let virtual_id = VirtualSchedulerModel::new(tx, component.into())
2170+
.schedule(path.udf_path, udf_args, scheduled_ts, context)
21672171
.await?;
21682172
Ok(virtual_id)
21692173
}
@@ -2185,7 +2189,14 @@ impl<RT: Runtime> ActionCallbacks for ApplicationFunctionRunner<RT> {
21852189
FunctionUsageTracker::new(),
21862190
PauseClient::new(),
21872191
"app_funrun_cancel_job",
2188-
|tx| async { VirtualSchedulerModel::new(tx).cancel(virtual_id).await }.into(),
2192+
|tx| {
2193+
async {
2194+
VirtualSchedulerModel::new(tx, TableNamespace::by_component_TODO())
2195+
.cancel(virtual_id)
2196+
.await
2197+
}
2198+
.into()
2199+
},
21892200
)
21902201
.await?;
21912202
Ok(())

crates/application/src/lib.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2402,7 +2402,9 @@ impl<RT: Runtime> Application<RT> {
24022402
path: Option<CanonicalizedComponentFunctionPath>,
24032403
max_jobs: usize,
24042404
) -> anyhow::Result<(usize, Vec<DeploymentAuditLogEvent>)> {
2405-
let count = SchedulerModel::new(tx).cancel_all(path, max_jobs).await?;
2405+
let count = SchedulerModel::new(tx, TableNamespace::by_component_TODO())
2406+
.cancel_all(path, max_jobs)
2407+
.await?;
24062408
Ok((count, vec![]))
24072409
}
24082410

crates/application/src/scheduled_jobs/mod.rs

Lines changed: 78 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,7 @@ use std::{
1010

1111
use common::{
1212
backoff::Backoff,
13-
components::{
14-
CanonicalizedComponentFunctionPath,
15-
ComponentPath,
16-
},
13+
components::CanonicalizedComponentFunctionPath,
1714
document::ParsedDocument,
1815
errors::{
1916
report_error,
@@ -61,14 +58,17 @@ use futures::{
6158
select_biased,
6259
Future,
6360
FutureExt,
61+
TryStreamExt,
6462
};
63+
use futures_async_stream::try_stream;
6564
use keybroker::Identity;
6665
use minitrace::future::FutureExt as _;
6766
use model::{
6867
backend_state::{
6968
types::BackendState,
7069
BackendStateModel,
7170
},
71+
components::ComponentsModel,
7272
modules::ModuleModel,
7373
scheduled_jobs::{
7474
types::{
@@ -80,6 +80,7 @@ use model::{
8080
NEXT_TS_FIELD,
8181
SCHEDULED_JOBS_INDEX,
8282
SCHEDULED_JOBS_INDEX_BY_COMPLETED_TS,
83+
SCHEDULED_JOBS_TABLE,
8384
},
8485
};
8586
use parking_lot::Mutex;
@@ -321,18 +322,8 @@ impl<RT: Runtime> ScheduledJobExecutor<RT> {
321322
job_finished_tx: &mpsc::Sender<ResolvedDocumentId>,
322323
) -> anyhow::Result<Option<Timestamp>> {
323324
let now = self.rt.generate_timestamp()?;
324-
let index_query = Query::index_range(IndexRange {
325-
index_name: SCHEDULED_JOBS_INDEX.clone(),
326-
range: vec![IndexRangeExpression::Gt(
327-
NEXT_TS_FIELD.clone(),
328-
value::ConvexValue::Null,
329-
)],
330-
order: Order::Asc,
331-
});
332-
let mut query_stream =
333-
ResolvedQuery::new(tx, TableNamespace::by_component_TODO(), index_query)?;
334-
while let Some(doc) = query_stream.next(tx, None).await? {
335-
let job: ParsedDocument<ScheduledJob> = doc.try_into()?;
325+
let mut job_stream = self.stream_jobs_to_run(tx);
326+
while let Some(job) = job_stream.try_next().await? {
336327
let (job_id, job) = job.clone().into_id_and_value();
337328
if running_job_ids.contains(&job_id) {
338329
continue;
@@ -373,6 +364,49 @@ impl<RT: Runtime> ScheduledJobExecutor<RT> {
373364
}
374365
Ok(None)
375366
}
367+
368+
#[try_stream(boxed, ok = ParsedDocument<ScheduledJob>, error = anyhow::Error)]
369+
async fn stream_jobs_to_run<'a>(&'a self, tx: &'a mut Transaction<RT>) {
370+
let namespaces: Vec<_> = tx
371+
.table_mapping()
372+
.iter()
373+
.filter(|(_, _, _, name)| **name == *SCHEDULED_JOBS_TABLE)
374+
.map(|(_, namespace, ..)| namespace)
375+
.collect();
376+
let index_query = Query::index_range(IndexRange {
377+
index_name: SCHEDULED_JOBS_INDEX.clone(),
378+
range: vec![IndexRangeExpression::Gt(
379+
NEXT_TS_FIELD.clone(),
380+
value::ConvexValue::Null,
381+
)],
382+
order: Order::Asc,
383+
});
384+
// Key is (next_ts, namespace), where next_ts is for sorting and namespace
385+
// is for deduping.
386+
// Value is (job, query) where job is the job to run and query will get
387+
// the next job to run in that namespace.
388+
let mut queries = BTreeMap::new();
389+
for namespace in namespaces {
390+
let mut query = ResolvedQuery::new(tx, namespace, index_query.clone())?;
391+
if let Some(doc) = query.next(tx, None).await? {
392+
let job: ParsedDocument<ScheduledJob> = doc.try_into()?;
393+
let next_ts = job.next_ts.ok_or_else(|| {
394+
anyhow::anyhow!("Could not get next_ts to run scheduled job {}", job.id())
395+
})?;
396+
queries.insert((next_ts, namespace), (job, query));
397+
}
398+
}
399+
while let Some(((_min_next_ts, namespace), (min_job, mut query))) = queries.pop_first() {
400+
yield min_job;
401+
if let Some(doc) = query.next(tx, None).await? {
402+
let job: ParsedDocument<ScheduledJob> = doc.try_into()?;
403+
let next_ts = job.next_ts.ok_or_else(|| {
404+
anyhow::anyhow!("Could not get next_ts to run scheduled job {}", job.id())
405+
})?;
406+
queries.insert((next_ts, namespace), (job, query));
407+
}
408+
}
409+
}
376410
}
377411

378412
impl<RT: Runtime> ScheduledJobContext<RT> {
@@ -424,14 +458,18 @@ impl<RT: Runtime> ScheduledJobContext<RT> {
424458

425459
tracing::info!("Executing {:?}!", job.udf_path);
426460
let identity = tx.inert_identity();
461+
let namespace = tx.table_mapping().tablet_namespace(job_id.tablet_id)?;
462+
let component_path = ComponentsModel::new(&mut tx)
463+
.get_component_path_for_namespace(namespace)
464+
.await?;
427465

428466
// Since we don't specify the function type when we schedule, we have to
429467
// use the analyzed result.
430468
let caller = FunctionCaller::Scheduler {
431469
job_id: job_id.into(),
432470
};
433471
let path = CanonicalizedComponentFunctionPath {
434-
component: ComponentPath::root(),
472+
component: component_path,
435473
udf_path: job.udf_path.clone(),
436474
};
437475
let udf_type = match ModuleModel::new(&mut tx)
@@ -440,7 +478,7 @@ impl<RT: Runtime> ScheduledJobContext<RT> {
440478
{
441479
Ok(analyzed_function) => analyzed_function.udf_type,
442480
Err(error) => {
443-
SchedulerModel::new(&mut tx)
481+
SchedulerModel::new(&mut tx, namespace)
444482
.complete(
445483
job_id,
446484
ScheduledJobState::Failed(error.user_facing_message()),
@@ -487,7 +525,7 @@ impl<RT: Runtime> ScheduledJobContext<RT> {
487525
UdfType::Mutation,
488526
UdfType::Action,
489527
);
490-
SchedulerModel::new(&mut tx)
528+
SchedulerModel::new(&mut tx, namespace)
491529
.complete(job_id, ScheduledJobState::Failed(message.clone()))
492530
.await?;
493531
self.database
@@ -545,16 +583,20 @@ impl<RT: Runtime> ScheduledJobContext<RT> {
545583
&self,
546584
request_id: RequestId,
547585
caller: FunctionCaller,
548-
tx: Transaction<RT>,
586+
mut tx: Transaction<RT>,
549587
job: ScheduledJob,
550588
job_id: ResolvedDocumentId,
551589
usage_tracker: FunctionUsageTracker,
552590
) -> anyhow::Result<()> {
553591
let start = self.rt.monotonic_now();
554592
let context = ExecutionContext::new(request_id, &caller);
555593
let identity = tx.inert_identity();
594+
let namespace = tx.table_mapping().tablet_namespace(job_id.tablet_id)?;
595+
let component_path = ComponentsModel::new(&mut tx)
596+
.get_component_path_for_namespace(namespace)
597+
.await?;
556598
let path = CanonicalizedComponentFunctionPath {
557-
component: ComponentPath::root(),
599+
component: component_path,
558600
udf_path: job.udf_path.clone(),
559601
};
560602
let result = self
@@ -587,7 +629,7 @@ impl<RT: Runtime> ScheduledJobContext<RT> {
587629
let execution_time = start.elapsed();
588630

589631
if outcome.result.is_ok() {
590-
SchedulerModel::new(&mut tx)
632+
SchedulerModel::new(&mut tx, namespace)
591633
.complete(job_id, ScheduledJobState::Success)
592634
.await?;
593635
if let Err(err) = self
@@ -614,7 +656,7 @@ impl<RT: Runtime> ScheduledJobContext<RT> {
614656
// Continue without updating since the job state has changed
615657
return Ok(());
616658
}
617-
SchedulerModel::new(&mut tx)
659+
SchedulerModel::new(&mut tx, namespace)
618660
.complete(
619661
job_id,
620662
ScheduledJobState::Failed(outcome.result.clone().unwrap_err().to_string()),
@@ -648,12 +690,16 @@ impl<RT: Runtime> ScheduledJobContext<RT> {
648690
) -> anyhow::Result<()> {
649691
let identity = tx.identity().clone();
650692
let mut tx = self.database.begin(identity.clone()).await?;
693+
let namespace = tx.table_mapping().tablet_namespace(job_id.tablet_id)?;
694+
let component_path = ComponentsModel::new(&mut tx)
695+
.get_component_path_for_namespace(namespace)
696+
.await?;
651697
match job.state {
652698
ScheduledJobState::Pending => {
653699
// Set state to in progress
654700
let mut updated_job = job.clone();
655701
updated_job.state = ScheduledJobState::InProgress;
656-
SchedulerModel::new(&mut tx)
702+
SchedulerModel::new(&mut tx, namespace)
657703
.replace(job_id, updated_job.clone())
658704
.await?;
659705
self.database
@@ -663,7 +709,7 @@ impl<RT: Runtime> ScheduledJobContext<RT> {
663709
// Execute the action
664710
let context = ExecutionContext::new(request_id, &caller);
665711
let path = CanonicalizedComponentFunctionPath {
666-
component: ComponentPath::root(),
712+
component: component_path,
667713
udf_path: job.udf_path.clone(),
668714
};
669715
let completion = self
@@ -703,7 +749,7 @@ impl<RT: Runtime> ScheduledJobContext<RT> {
703749
// before updating the state. Since we execute actions at most once,
704750
// complete this job and log the error.
705751
let message = "Transient error while executing action".to_string();
706-
SchedulerModel::new(&mut tx)
752+
SchedulerModel::new(&mut tx, namespace)
707753
.complete(job_id, ScheduledJobState::Failed(message.clone()))
708754
.await?;
709755
self.database
@@ -715,7 +761,7 @@ impl<RT: Runtime> ScheduledJobContext<RT> {
715761
// we can log correctly here.
716762
let context = ExecutionContext::new(request_id, &caller);
717763
let path = CanonicalizedComponentFunctionPath {
718-
component: ComponentPath::root(),
764+
component: component_path,
719765
udf_path: job.udf_path.clone(),
720766
};
721767
self.function_log.log_action_system_error(
@@ -776,9 +822,10 @@ impl<RT: Runtime> ScheduledJobContext<RT> {
776822
// Continue without updating since the job state has changed
777823
return Ok(());
778824
}
825+
let namespace = tx.table_mapping().tablet_namespace(job_id.tablet_id)?;
779826

780827
// Remove from the scheduled jobs table
781-
SchedulerModel::new(&mut tx)
828+
SchedulerModel::new(&mut tx, namespace)
782829
.complete(job_id, job_state)
783830
.await?;
784831
self.database
@@ -814,6 +861,7 @@ impl<RT: Runtime> ScheduledJobGarbageCollector<RT> {
814861
async fn run(&self, backoff: &mut Backoff) -> anyhow::Result<()> {
815862
loop {
816863
let mut tx = self.database.begin(Identity::system()).await?;
864+
let namespace = TableNamespace::by_component_TODO();
817865
let now = self.rt.generate_timestamp()?;
818866
let index_query = Query::index_range(IndexRange {
819867
index_name: SCHEDULED_JOBS_INDEX_BY_COMPLETED_TS.clone(),
@@ -824,8 +872,7 @@ impl<RT: Runtime> ScheduledJobGarbageCollector<RT> {
824872
order: Order::Asc,
825873
})
826874
.limit(*SCHEDULED_JOB_GARBAGE_COLLECTION_BATCH_SIZE);
827-
let mut query_stream =
828-
ResolvedQuery::new(&mut tx, TableNamespace::by_component_TODO(), index_query)?;
875+
let mut query_stream = ResolvedQuery::new(&mut tx, namespace, index_query)?;
829876

830877
let mut next_job_wait = None;
831878
let mut jobs_to_delete = vec![];
@@ -855,7 +902,7 @@ impl<RT: Runtime> ScheduledJobGarbageCollector<RT> {
855902
"Garbage collecting {} finished scheduled jobs",
856903
jobs_to_delete.len()
857904
);
858-
let mut model = SchedulerModel::new(&mut tx);
905+
let mut model = SchedulerModel::new(&mut tx, namespace);
859906
for job_id in jobs_to_delete {
860907
model.delete(job_id).await?;
861908
}

0 commit comments

Comments
 (0)