@@ -60,12 +60,16 @@ def _actor_log_context(request) -> tuple[str, str | None]:
6060
6161def _mark_async_services_seen_for_pipelines (pipeline_slugs : tuple [str , ...]) -> None :
6262 """
63- Heartbeat for idle worker polls that send ``pipeline__slug__in=...`` but no
64- ``project_id`` — the real ADC worker shape, where one worker may serve
65- pipelines across many projects and has no single project to nominate.
66-
67- Redis throttle keyed on the sorted slug set so concurrent pollers for the
68- same pipelines share a single dispatch per window.
63+ Redis-throttled wrapper around the ``update_async_services_seen_for_pipelines``
64+ celery task. The wrapper does no DB work itself — it gates dispatch so at
65+ most one heartbeat is enqueued per sorted slug set per
66+ ``HEARTBEAT_THROTTLE_SECONDS`` window (currently 30s), keeping the HTTP
67+ request path cheap under concurrent polling.
68+
69+ Called from the ``?ids_only=1`` branch of ``JobViewSet.list()`` — the real
70+ ADC worker shape, which sends ``pipeline__slug__in=<slugs>`` and no
71+ ``project_id`` (one worker may serve pipelines across many projects and
72+ has no single project to nominate).
6973 """
7074 if not pipeline_slugs :
7175 return
@@ -80,17 +84,18 @@ def _mark_async_services_seen_for_pipelines(pipeline_slugs: tuple[str, ...]) ->
8084
8185def _mark_async_services_seen_for_project (project_id : int ) -> None :
8286 """
83- Heartbeat for idle worker polls on ``GET /api/v2/jobs/?ids_only=1``.
84-
85- The pipeline-scoped heartbeat in ``_mark_pipeline_pull_services_seen`` only
86- fires when a worker hits /tasks/ or /result/ on an active job; workers idling
87- on the list endpoint between jobs had no heartbeat path at all, so their
88- ``last_seen`` would age out of ``PROCESSING_SERVICE_LAST_SEEN_MAX`` and the
89- UI would flip them to offline despite being actively online.
90-
91- Scope: marks every async service attached to the polling project. The list
92- endpoint has no pipeline context to narrow by. Once application-token auth
93- lands (PR #1117), this should be scoped to the specific calling service.
87+ Redis-throttled wrapper around ``update_async_services_seen_for_project``.
88+ Same shape as ``_mark_async_services_seen_for_pipelines`` above — gates
89+ celery dispatch to at most one per-project enqueue per
90+ ``HEARTBEAT_THROTTLE_SECONDS`` window — but keyed by project id for
91+ callers that send ``?project_id=`` without ``pipeline__slug__in``.
92+
93+ The ADC worker does not currently use this shape, so this is a fallback.
94+ Background on why idle-poll heartbeats exist at all: the other heartbeat
95+ (``_mark_pipeline_pull_services_seen``) only fires from ``/tasks/`` and
96+ ``/result/`` — i.e., from workers with active work — so a worker sitting
97+ on ``GET /jobs/?ids_only=1`` between jobs would otherwise age past
98+ ``PROCESSING_SERVICE_LAST_SEEN_MAX`` and flip to offline in the UI.
9499 """
95100 cache_key = f"heartbeat:list:project:{ project_id } "
96101 if not cache .add (cache_key , 1 , timeout = HEARTBEAT_THROTTLE_SECONDS ):
@@ -302,6 +307,7 @@ def get_queryset(self) -> QuerySet:
302307 status = JobState .failed_states (),
303308 updated_at__lt = cutoff_datetime ,
304309 )
310+ # ⚠️ TEMPORARY HACK — remove by 2026-04-24.
305311 # Worker-polling call path (`ids_only=1`): randomize order so concurrent
306312 # pollers don't all converge on the same head-of-queue job. An
307313 # `updated_at`-based sort has a degenerate case at startup — freshly
@@ -310,18 +316,32 @@ def get_queryset(self) -> QuerySet:
310316 # ordering gives probabilistic disjoint assignment without writing a
311317 # poll-stamp column. Combined with `limit=1` below, each poll is an
312318 # independent "pick any unfinished job" draw.
319+ #
320+ # The whole `ids_only=1` branch (this ordering override, the paginator
321+ # override in `paginator` below, the heartbeat dispatch in `list()`)
322+ # exists because the ADC worker currently repurposes this list endpoint
323+ # as a claim-next-job call. Correct shape is a dedicated `/next` action
324+ # (tracked as #1265). Once `/next` ships
325+ # and ADC is migrated, delete this `order_by("?")` override along with
326+ # the paginator override and the list() heartbeat branch.
313327 if self .action == "list" and url_boolean_param (self .request , "ids_only" , default = False ):
314328 jobs = jobs .order_by ("?" )
315329 return jobs
316330
317331 @property
318332 def paginator (self ):
333+ # ⚠️ TEMPORARY HACK — remove by 2026-04-24.
319334 # Treat `?ids_only=1` as a pop()-style handoff ("what job is next?")
320335 # rather than a list() dump: default to one job per response unless the
321336 # caller explicitly asks for a batch via ?limit=N or ?page_size=N.
322337 # Concurrent pollers drain a cached list serially and starve later jobs;
323- # forcing a re-poll per job lets the `updated_at` fairness sort rotate
338+ # forcing a re-poll per job lets the random-shuffle fairness sort rotate
324339 # work across jobs every iteration. No ADC-side change required.
340+ #
341+ # This override exists only because `list(ids_only=True)` is being used
342+ # as a claim-next-job call. Replace with a dedicated `/next` action
343+ # (tracked as #1265); once ADC is migrated,
344+ # drop this override so the list endpoint goes back to normal pagination.
325345 if not hasattr (self , "_paginator" ):
326346 if (
327347 self .action == "list"
@@ -344,11 +364,18 @@ def paginator(self):
344364 ]
345365 )
346366 def list (self , request , * args , ** kwargs ):
367+ # ⚠️ TEMPORARY HACK — remove by 2026-04-24.
347368 # Worker-polling call path: record heartbeat for async processing services.
348369 # The real ADC worker request carries ``pipeline__slug__in=...`` and no
349370 # project_id, so prefer the pipeline-slug scope when those slugs are
350371 # present; fall back to project scope for callers that pass ?project_id=.
351372 # Throttled via Redis so concurrent pollers don't churn the DB/broker.
373+ #
374+ # This heartbeat branch lives on `list()` only because `list(ids_only=1)`
375+ # is doubling as the worker's claim-next-job endpoint. Once a dedicated
376+ # `/next` action ships (tracked as #1265)
377+ # and ADC is migrated to it, move the heartbeat to that action and
378+ # delete this branch — `list()` should go back to being a plain list.
352379 if url_boolean_param (request , "ids_only" , default = False ):
353380 pipeline_slugs_raw = request .query_params .get ("pipeline__slug__in" )
354381 if pipeline_slugs_raw :
0 commit comments