From d241c9e79c2341d7811f7ff1f399595f4a4fc5b7 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Mon, 2 Mar 2026 16:30:23 -0800 Subject: [PATCH 01/16] [scheduler] Implement an orchestration logic to make scheduler distributable --- .gitignore | 1 + ...V38__Add_scheduler_orchestrator_tables.sql | 25 + rust/crates/scheduler/Cargo.toml | 1 + rust/crates/scheduler/src/cluster.rs | 117 +++- rust/crates/scheduler/src/config/mod.rs | 46 ++ rust/crates/scheduler/src/dao/cluster_dao.rs | 4 +- rust/crates/scheduler/src/main.rs | 13 + rust/crates/scheduler/src/metrics/mod.rs | 72 ++- rust/crates/scheduler/src/orchestrator/dao.rs | 254 ++++++++ .../scheduler/src/orchestrator/distributor.rs | 582 ++++++++++++++++++ .../scheduler/src/orchestrator/instance.rs | 153 +++++ .../scheduler/src/orchestrator/leader.rs | 158 +++++ rust/crates/scheduler/src/orchestrator/mod.rs | 108 ++++ .../crates/scheduler/src/orchestrator/sync.rs | 106 ++++ .../scheduler/src/pipeline/entrypoint.rs | 2 +- rust/crates/scheduler/tests/smoke_tests.rs | 90 +-- rust/crates/scheduler/tests/stress_tests.rs | 2 +- rust/crates/scheduler/tests/util.rs | 1 + 18 files changed, 1659 insertions(+), 76 deletions(-) create mode 100644 cuebot/src/main/resources/conf/ddl/postgres/migrations/V38__Add_scheduler_orchestrator_tables.sql create mode 100644 rust/crates/scheduler/src/orchestrator/dao.rs create mode 100644 rust/crates/scheduler/src/orchestrator/distributor.rs create mode 100644 rust/crates/scheduler/src/orchestrator/instance.rs create mode 100644 rust/crates/scheduler/src/orchestrator/leader.rs create mode 100644 rust/crates/scheduler/src/orchestrator/mod.rs create mode 100644 rust/crates/scheduler/src/orchestrator/sync.rs diff --git a/.gitignore b/.gitignore index 72fd11b6a..607860309 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,4 @@ CLAUDE.md docs/nav_order_index.txt rust/.cargo/config.toml sandbox/pgadmin-data/* +plans/* diff --git a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V38__Add_scheduler_orchestrator_tables.sql b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V38__Add_scheduler_orchestrator_tables.sql new file mode 100644 index 000000000..097b59077 --- /dev/null +++ b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V38__Add_scheduler_orchestrator_tables.sql @@ -0,0 +1,25 @@ +-- Scheduler orchestrator tables for distributed cluster assignment (Mode 4) + +CREATE TABLE scheduler_instance ( + pk_instance UUID PRIMARY KEY, + str_name VARCHAR(256) NOT NULL, + str_facility VARCHAR(256), + ts_heartbeat TIMESTAMPTZ NOT NULL DEFAULT NOW(), + ts_registered TIMESTAMPTZ NOT NULL DEFAULT NOW(), + int_capacity INTEGER NOT NULL DEFAULT 100, + float_jobs_queried DOUBLE PRECISION NOT NULL DEFAULT 0, + b_draining BOOLEAN NOT NULL DEFAULT FALSE +); + +CREATE INDEX idx_scheduler_instance_heartbeat ON scheduler_instance(ts_heartbeat); + +CREATE TABLE scheduler_cluster_assignment ( + pk_assignment UUID PRIMARY KEY DEFAULT gen_random_uuid(), + pk_instance UUID NOT NULL REFERENCES scheduler_instance(pk_instance) ON DELETE CASCADE, + str_cluster TEXT NOT NULL, + int_version INTEGER NOT NULL DEFAULT 0, + ts_assigned TIMESTAMPTZ NOT NULL DEFAULT NOW(), + UNIQUE(str_cluster) +); + +CREATE INDEX idx_sca_instance ON scheduler_cluster_assignment(pk_instance); diff --git a/rust/crates/scheduler/Cargo.toml b/rust/crates/scheduler/Cargo.toml index ab2f81726..9d9c17d63 100644 --- a/rust/crates/scheduler/Cargo.toml +++ b/rust/crates/scheduler/Cargo.toml @@ -56,6 +56,7 @@ prometheus = "0.13" axum = "0.7" tower-http = { version = "0.5", features = ["trace"] } urlencoding = "2.1" +gethostname = "0.4" [features] default = [] diff --git a/rust/crates/scheduler/src/cluster.rs b/rust/crates/scheduler/src/cluster.rs index 71a93d7c5..8ffb347a2 100644 --- a/rust/crates/scheduler/src/cluster.rs +++ b/rust/crates/scheduler/src/cluster.rs @@ -74,10 +74,10 @@ impl Cluster { #[derive(Debug)] pub struct ClusterFeed { - pub clusters: Arc>>, - current_index: Arc, - stop_flag: Arc, - sleep_map: Arc>>, + pub clusters: RwLock>, + current_index: AtomicUsize, + stop_flag: AtomicBool, + sleep_map: Mutex>, } /// Control messages for the cluster feed stream. @@ -142,7 +142,7 @@ impl ClusterFeedBuilder { /// If explicit clusters were provided via [`with_clusters`](Self::with_clusters), they are /// used directly (filtered by ignore tags). Otherwise all clusters are loaded from the /// database, filtered to the configured facility and ignore tags. - pub async fn build(self) -> Result { + pub async fn build(self) -> Result> { let clusters = if self.clusters.is_empty() && self.entire_shows.is_empty() { let all = ClusterFeed::load_clusters(self.facility_id, &self.ignore_tags, None).await?; ClusterFeed::filter_clusters(all, &self.ignore_tags) @@ -159,16 +159,60 @@ impl ClusterFeedBuilder { } ClusterFeed::filter_clusters(clusters.into_iter().collect(), &self.ignore_tags) }; - Ok(ClusterFeed { - clusters: Arc::new(RwLock::new(clusters)), - current_index: Arc::new(AtomicUsize::new(0)), - stop_flag: Arc::new(AtomicBool::new(false)), - sleep_map: Arc::new(Mutex::new(HashMap::new())), - }) + Ok(Arc::new(ClusterFeed { + clusters: RwLock::new(clusters), + current_index: AtomicUsize::new(0), + stop_flag: AtomicBool::new(false), + sleep_map: Mutex::new(HashMap::new()), + })) } } impl ClusterFeed { + /// Creates an empty ClusterFeed with no clusters. + /// + /// Used in orchestrated mode where clusters are populated dynamically + /// by the sync loop polling the orchestrator's assignment table. + pub fn empty() -> Arc { + Arc::new(ClusterFeed { + clusters: RwLock::new(Vec::new()), + current_index: AtomicUsize::new(0), + stop_flag: AtomicBool::new(false), + sleep_map: Mutex::new(HashMap::new()), + }) + } + + /// Creates a ClusterFeed from a predefined list of clusters. + /// + /// Applies ignore-tag filtering, then wraps the result in an `Arc`. + /// Useful in tests and scenarios where clusters are known upfront. + pub fn from_clusters(clusters: Vec, ignore_tags: &[String]) -> Arc { + let clusters = Self::filter_clusters(clusters, ignore_tags); + Arc::new(ClusterFeed { + clusters: RwLock::new(clusters), + current_index: AtomicUsize::new(0), + stop_flag: AtomicBool::new(false), + sleep_map: Mutex::new(HashMap::new()), + }) + } + + /// Replaces the cluster list with a new set of clusters. + /// + /// Called by the orchestrator sync loop when assignments change. + /// Resets the round-robin index and cleans up stale sleep entries. + pub fn update_clusters(&self, new_clusters: Vec) { + let new_set: HashSet<&Cluster> = new_clusters.iter().collect(); + { + let mut sleep_map = self.sleep_map.lock().unwrap_or_else(|p| p.into_inner()); + sleep_map.retain(|c, _| new_set.contains(c)); + } + { + let mut clusters = self.clusters.write().unwrap_or_else(|p| p.into_inner()); + *clusters = new_clusters; + } + self.current_index.store(0, Ordering::Relaxed); + } + /// Returns a builder for a feed scoped to the given facility. pub fn facility(facility_id: Uuid) -> ClusterFeedBuilder { ClusterFeedBuilder { @@ -357,45 +401,58 @@ impl ClusterFeed { /// - Applies backoff delays between rounds (varies based on sleeping cluster count) /// - Stops when receiving a Stop message or when configured empty cycles limit is reached /// - Automatically cleans up expired sleep entries - pub async fn stream(self, sender: mpsc::Sender) -> mpsc::Sender { + pub async fn stream( + self: Arc, + sender: mpsc::Sender, + ) -> mpsc::Sender { // Use a small channel to ensure the producer waits for items to be consumed before // generating more let (cancel_sender, mut feed_receiver) = mpsc::channel(8); - let stop_flag = self.stop_flag.clone(); - let sleep_map = self.sleep_map.clone(); + let self_arc = self.clone(); // Stream clusters on the caller channel tokio::spawn(async move { let mut all_sleeping_rounds = 0; - let feed = self.clusters.clone(); - let current_index_atomic = self.current_index.clone(); + let feed = &self_arc.clusters; + let current_index_atomic = &self_arc.current_index; loop { // Check stop flag - if stop_flag.load(Ordering::Relaxed) { + if self_arc.stop_flag.load(Ordering::Relaxed) { warn!("Cluster received a stop message. Stopping feed."); break; } - let (item, cluster_size, completed_round) = { + let snapshot = { let clusters = feed.read().unwrap_or_else(|poisoned| poisoned.into_inner()); if clusters.is_empty() { - break; + None + } else { + let current_index = current_index_atomic.load(Ordering::Relaxed); + let item = clusters[current_index].clone(); + let next_index = (current_index + 1) % clusters.len(); + let completed_round = next_index == 0; // Detect wrap-around + current_index_atomic.store(next_index, Ordering::Relaxed); + Some((item, clusters.len(), completed_round)) } + }; - let current_index = current_index_atomic.load(Ordering::Relaxed); - let item = clusters[current_index].clone(); - let next_index = (current_index + 1) % clusters.len(); - let completed_round = next_index == 0; // Detect wrap-around - current_index_atomic.store(next_index, Ordering::Relaxed); - - (item, clusters.len(), completed_round) + // Guard is dropped here; safe to await + let (item, cluster_size, completed_round) = match snapshot { + Some(s) => s, + None => { + // No clusters yet (e.g. orchestrated mode waiting for assignments). + // Sleep briefly and retry instead of exiting the loop. + tokio::time::sleep(Duration::from_secs(1)).await; + continue; + } }; // Skip cluster if it is marked as sleeping let is_sleeping = { - let mut sleep_map_lock = sleep_map.lock().unwrap_or_else(|p| p.into_inner()); + let mut sleep_map_lock = + self_arc.sleep_map.lock().unwrap_or_else(|p| p.into_inner()); if let Some(wake_up_time) = sleep_map_lock.get(&item) { if *wake_up_time > SystemTime::now() { // Still sleeping, skip it @@ -421,7 +478,8 @@ impl ClusterFeed { // Check if all/most clusters are sleeping let sleeping_count = { - let sleep_map_lock = sleep_map.lock().unwrap_or_else(|p| p.into_inner()); + let sleep_map_lock = + self_arc.sleep_map.lock().unwrap_or_else(|p| p.into_inner()); sleep_map_lock.len() }; if sleeping_count >= cluster_size { @@ -449,7 +507,6 @@ impl ClusterFeed { }); // Process messages on the receiving end - let sleep_map = self.sleep_map.clone(); tokio::spawn(async move { while let Some(message) = feed_receiver.recv().await { match message { @@ -458,7 +515,7 @@ impl ClusterFeed { debug!("{:?} put to sleep for {}s", cluster, duration.as_secs()); { let mut sleep_map_lock = - sleep_map.lock().unwrap_or_else(|p| p.into_inner()); + self.sleep_map.lock().unwrap_or_else(|p| p.into_inner()); sleep_map_lock.insert(cluster, wake_up_time); } } else { diff --git a/rust/crates/scheduler/src/config/mod.rs b/rust/crates/scheduler/src/config/mod.rs index 908d61453..0010de6be 100644 --- a/rust/crates/scheduler/src/config/mod.rs +++ b/rust/crates/scheduler/src/config/mod.rs @@ -42,6 +42,7 @@ pub struct Config { pub rqd: RqdConfig, pub host_cache: HostCacheConfig, pub scheduler: SchedulerConfig, + pub orchestrator: OrchestratorConfig, } #[derive(Debug, Deserialize, Clone)] @@ -258,6 +259,51 @@ pub struct ManualTags { pub tags: Vec, } +#[derive(Debug, Deserialize, Clone)] +#[serde(default)] +pub struct OrchestratorConfig { + /// How often this instance updates its heartbeat (default: 5s) + #[serde(with = "humantime_serde")] + pub heartbeat_interval: Duration, + + /// Instance is considered dead after this duration without heartbeat (default: 30s) + #[serde(with = "humantime_serde")] + pub failure_threshold: Duration, + + /// How often the leader recalculates cluster distribution (default: 10s) + #[serde(with = "humantime_serde")] + pub distribution_interval: Duration, + + /// How often workers poll for assignment changes (default: 5s) + #[serde(with = "humantime_serde")] + pub poll_interval: Duration, + + /// How often non-leaders attempt to acquire the leader lock (default: 10s) + #[serde(with = "humantime_serde")] + pub election_interval: Duration, + + /// Relative capacity weight of this instance (default: 100) + pub capacity: u32, + + /// Graceful shutdown timeout before force-killing in-flight work (default: 30s) + #[serde(with = "humantime_serde")] + pub shutdown_timeout: Duration, +} + +impl Default for OrchestratorConfig { + fn default() -> Self { + OrchestratorConfig { + heartbeat_interval: Duration::from_secs(5), + failure_threshold: Duration::from_secs(30), + distribution_interval: Duration::from_secs(10), + poll_interval: Duration::from_secs(5), + election_interval: Duration::from_secs(10), + capacity: 100, + shutdown_timeout: Duration::from_secs(30), + } + } +} + //===Config Loader=== impl Config { diff --git a/rust/crates/scheduler/src/dao/cluster_dao.rs b/rust/crates/scheduler/src/dao/cluster_dao.rs index 3aeb8fa41..a76479b3c 100644 --- a/rust/crates/scheduler/src/dao/cluster_dao.rs +++ b/rust/crates/scheduler/src/dao/cluster_dao.rs @@ -209,7 +209,7 @@ impl ClusterDao { &self, facility_id: Option, shows_filter: Option>, - ) -> std::pin::Pin> + '_>> { + ) -> std::pin::Pin> + Send + '_>> { match (facility_id, shows_filter) { (Some(fid), Some(show_names)) => Box::pin( sqlx::query_as::<_, ClusterModel>( @@ -253,7 +253,7 @@ impl ClusterDao { &self, facility_id: Option, shows_filter: Option>, - ) -> std::pin::Pin> + '_>> { + ) -> std::pin::Pin> + Send + '_>> { match (facility_id, shows_filter) { (Some(fid), Some(show_names)) => Box::pin( sqlx::query_as::<_, ClusterModel>( diff --git a/rust/crates/scheduler/src/main.rs b/rust/crates/scheduler/src/main.rs index a8ec85499..19c3eaf65 100644 --- a/rust/crates/scheduler/src/main.rs +++ b/rust/crates/scheduler/src/main.rs @@ -34,6 +34,7 @@ mod dao; mod host_cache; mod metrics; mod models; +mod orchestrator; mod pgpool; mod pipeline; @@ -70,6 +71,13 @@ pub struct JobQueueCli { long_help = "A list of tags to ignore when loading clusters." )] ignore_tags: Vec, + + #[structopt( + long, + short = "o", + long_help = "Run in orchestrated mode. Clusters are assigned by the orchestrator leader." + )] + orchestrated: bool, } #[derive(Debug, Clone)] @@ -170,6 +178,11 @@ impl JobQueueCli { async fn run(&self) -> miette::Result<()> { let (facility, entire_shows, alloc_tags, manual_tags, ignore_tags) = self.resolve_config(); + // Orchestrated mode — delegate to orchestrator module + if self.orchestrated { + return orchestrator::run(facility, ignore_tags).await; + } + // Lookup facility_id from facility name let facility_id = match &facility { Some(facility) => Some( diff --git a/rust/crates/scheduler/src/metrics/mod.rs b/rust/crates/scheduler/src/metrics/mod.rs index f96eabbb1..43daa0925 100644 --- a/rust/crates/scheduler/src/metrics/mod.rs +++ b/rust/crates/scheduler/src/metrics/mod.rs @@ -13,8 +13,8 @@ use axum::{response::IntoResponse, routing::get, Router}; use lazy_static::lazy_static; use prometheus::{ - register_counter, register_counter_vec, register_histogram, Counter, CounterVec, Encoder, - Histogram, TextEncoder, + register_counter, register_counter_vec, register_histogram, register_int_gauge, Counter, + CounterVec, Encoder, Histogram, IntGauge, TextEncoder, }; use std::time::Duration; use tracing::{error, info}; @@ -70,6 +70,31 @@ lazy_static! { vec![0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0] ) .expect("Failed to register job_query_duration_seconds histogram"); + + // Orchestrator metrics + pub static ref ORCHESTRATOR_ASSIGNED_CLUSTERS: IntGauge = register_int_gauge!( + "scheduler_orchestrator_assigned_clusters", + "Number of clusters assigned to this instance" + ) + .expect("Failed to register orchestrator_assigned_clusters gauge"); + + pub static ref ORCHESTRATOR_IS_LEADER: IntGauge = register_int_gauge!( + "scheduler_orchestrator_is_leader", + "Whether this instance is the orchestrator leader (1 = leader, 0 = worker)" + ) + .expect("Failed to register orchestrator_is_leader gauge"); + + pub static ref ORCHESTRATOR_INSTANCES_ALIVE: IntGauge = register_int_gauge!( + "scheduler_orchestrator_instances_alive", + "Total number of live scheduler instances (leader only)" + ) + .expect("Failed to register orchestrator_instances_alive gauge"); + + pub static ref ORCHESTRATOR_REBALANCE_TOTAL: Counter = register_counter!( + "scheduler_orchestrator_rebalance_total", + "Total number of cluster distribution rebalance events" + ) + .expect("Failed to register orchestrator_rebalance_total counter"); } /// Handler for the /metrics endpoint @@ -107,8 +132,23 @@ async fn metrics_handler() -> impl IntoResponse { /// # Returns /// /// This function runs indefinitely and only returns if the server fails to start +/// Handler for the /health endpoint +async fn health_handler() -> impl IntoResponse { + let assigned = ORCHESTRATOR_ASSIGNED_CLUSTERS.get(); + if assigned > 0 { + (axum::http::StatusCode::OK, "ok") + } else { + ( + axum::http::StatusCode::SERVICE_UNAVAILABLE, + "no clusters assigned", + ) + } +} + pub async fn start_server(addr: &str) -> miette::Result<()> { - let app = Router::new().route("/metrics", get(metrics_handler)); + let app = Router::new() + .route("/metrics", get(metrics_handler)) + .route("/health", get(health_handler)); let listener = tokio::net::TcpListener::bind(addr) .await @@ -166,3 +206,29 @@ pub fn observe_time_to_book(duration: Duration) { pub fn observe_job_query_duration(duration: Duration) { JOB_QUERY_DURATION_SECONDS.observe(duration.as_secs_f64()); } + +// --- Orchestrator metrics --- + +/// Set the number of clusters assigned to this instance +#[inline] +pub fn set_orchestrator_assigned_clusters(count: usize) { + ORCHESTRATOR_ASSIGNED_CLUSTERS.set(count as i64); +} + +/// Set whether this instance is the orchestrator leader +#[inline] +pub fn set_orchestrator_is_leader(is_leader: bool) { + ORCHESTRATOR_IS_LEADER.set(if is_leader { 1 } else { 0 }); +} + +/// Set the total number of live instances (leader only) +#[inline] +pub fn set_orchestrator_instances_alive(count: usize) { + ORCHESTRATOR_INSTANCES_ALIVE.set(count as i64); +} + +/// Increment the rebalance counter +#[inline] +pub fn increment_orchestrator_rebalance() { + ORCHESTRATOR_REBALANCE_TOTAL.inc(); +} diff --git a/rust/crates/scheduler/src/orchestrator/dao.rs b/rust/crates/scheduler/src/orchestrator/dao.rs new file mode 100644 index 000000000..81760fa5b --- /dev/null +++ b/rust/crates/scheduler/src/orchestrator/dao.rs @@ -0,0 +1,254 @@ +// Copyright Contributors to the OpenCue Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. + +use std::sync::Arc; +use std::time::Duration; + +use miette::{IntoDiagnostic, Result}; +use sqlx::{Pool, Postgres}; +use uuid::Uuid; + +use crate::cluster::Cluster; +use crate::pgpool::connection_pool; + +/// Data Access Object for orchestrator tables (scheduler_instance and scheduler_cluster_assignment). +pub struct OrchestratorDao { + connection_pool: Arc>, +} + +#[derive(sqlx::FromRow, Debug, Clone)] +#[allow(dead_code)] +pub struct InstanceRow { + pub pk_instance: String, + pub str_name: String, + pub str_facility: Option, + pub int_capacity: i32, + pub float_jobs_queried: f64, + pub b_draining: bool, +} + +#[derive(sqlx::FromRow, Debug, Clone)] +#[allow(dead_code)] +pub struct ClusterAssignmentRow { + pub pk_assignment: String, + pub pk_instance: String, + pub str_cluster: String, + pub int_version: i32, +} + +// --- Instance queries --- + +static INSERT_INSTANCE: &str = r#" +INSERT INTO scheduler_instance (pk_instance, str_name, str_facility, int_capacity, ts_heartbeat, ts_registered) +VALUES ($1, $2, $3, $4, NOW(), NOW()) +"#; + +static UPDATE_HEARTBEAT: &str = r#" +UPDATE scheduler_instance +SET ts_heartbeat = NOW(), float_jobs_queried = $2 +WHERE pk_instance = $1 +"#; + +static SET_DRAINING: &str = r#" +UPDATE scheduler_instance +SET b_draining = TRUE +WHERE pk_instance = $1 +"#; + +static DELETE_INSTANCE: &str = r#" +DELETE FROM scheduler_instance +WHERE pk_instance = $1 +"#; + +static DELETE_DEAD_INSTANCES: &str = r#" +DELETE FROM scheduler_instance +WHERE ts_heartbeat < NOW() - $1::interval +RETURNING pk_instance +"#; + +static QUERY_LIVE_INSTANCES: &str = r#" +SELECT pk_instance, str_name, str_facility, int_capacity, float_jobs_queried, b_draining +FROM scheduler_instance +WHERE ts_heartbeat >= NOW() - $1::interval + AND b_draining = FALSE +"#; + +// --- Cluster assignment queries --- + +static QUERY_ASSIGNMENTS_FOR_INSTANCE: &str = r#" +SELECT pk_assignment, pk_instance, str_cluster, int_version +FROM scheduler_cluster_assignment +WHERE pk_instance = $1 +"#; + +static QUERY_ALL_ASSIGNMENTS: &str = r#" +SELECT pk_assignment, pk_instance, str_cluster, int_version +FROM scheduler_cluster_assignment +"#; + +static UPSERT_ASSIGNMENT: &str = r#" +INSERT INTO scheduler_cluster_assignment (pk_instance, str_cluster, int_version, ts_assigned) +VALUES ($1, $2, 0, NOW()) +ON CONFLICT (str_cluster) +DO UPDATE SET pk_instance = $1, int_version = scheduler_cluster_assignment.int_version + 1, ts_assigned = NOW() +"#; + +static DELETE_ASSIGNMENT_BY_CLUSTER: &str = r#" +DELETE FROM scheduler_cluster_assignment +WHERE str_cluster = $1 +"#; + +// --- Advisory lock --- + +static TRY_ADVISORY_LOCK: &str = r#" +SELECT pg_try_advisory_lock($1) +"#; + +impl OrchestratorDao { + pub async fn new() -> Result { + let pool = connection_pool().await.into_diagnostic()?; + Ok(OrchestratorDao { + connection_pool: pool, + }) + } + + // --- Instance operations --- + + pub async fn register_instance( + &self, + instance_id: Uuid, + name: &str, + facility: Option<&str>, + capacity: i32, + ) -> Result<(), sqlx::Error> { + sqlx::query(INSERT_INSTANCE) + .bind(instance_id.to_string()) + .bind(name) + .bind(facility) + .bind(capacity) + .execute(&*self.connection_pool) + .await?; + Ok(()) + } + + pub async fn update_heartbeat( + &self, + instance_id: Uuid, + jobs_queried: f64, + ) -> Result<(), sqlx::Error> { + sqlx::query(UPDATE_HEARTBEAT) + .bind(instance_id.to_string()) + .bind(jobs_queried) + .execute(&*self.connection_pool) + .await?; + Ok(()) + } + + pub async fn set_draining(&self, instance_id: Uuid) -> Result<(), sqlx::Error> { + sqlx::query(SET_DRAINING) + .bind(instance_id.to_string()) + .execute(&*self.connection_pool) + .await?; + Ok(()) + } + + pub async fn delete_instance(&self, instance_id: Uuid) -> Result<(), sqlx::Error> { + sqlx::query(DELETE_INSTANCE) + .bind(instance_id.to_string()) + .execute(&*self.connection_pool) + .await?; + Ok(()) + } + + pub async fn delete_dead_instances( + &self, + failure_threshold: Duration, + ) -> Result, sqlx::Error> { + let interval = format!("{} seconds", failure_threshold.as_secs()); + let rows: Vec<(String,)> = sqlx::query_as(DELETE_DEAD_INSTANCES) + .bind(interval) + .fetch_all(&*self.connection_pool) + .await?; + Ok(rows + .into_iter() + .map(|(id,)| crate::dao::helpers::parse_uuid(&id)) + .collect()) + } + + pub async fn get_live_instances( + &self, + failure_threshold: Duration, + ) -> Result, sqlx::Error> { + let interval = format!("{} seconds", failure_threshold.as_secs()); + sqlx::query_as::<_, InstanceRow>(QUERY_LIVE_INSTANCES) + .bind(interval) + .fetch_all(&*self.connection_pool) + .await + } + + // --- Cluster assignment operations --- + + pub async fn get_assignments_for_instance( + &self, + instance_id: Uuid, + ) -> Result, sqlx::Error> { + sqlx::query_as::<_, ClusterAssignmentRow>(QUERY_ASSIGNMENTS_FOR_INSTANCE) + .bind(instance_id.to_string()) + .fetch_all(&*self.connection_pool) + .await + } + + pub async fn get_all_assignments(&self) -> Result, sqlx::Error> { + sqlx::query_as::<_, ClusterAssignmentRow>(QUERY_ALL_ASSIGNMENTS) + .fetch_all(&*self.connection_pool) + .await + } + + /// Upserts a cluster assignment. If the cluster is already assigned, updates the instance + /// and bumps the version. + pub async fn upsert_assignment( + &self, + instance_id: Uuid, + cluster: &Cluster, + ) -> Result<(), sqlx::Error> { + let cluster_json = + serde_json::to_string(cluster).expect("Failed to serialize Cluster to JSON"); + sqlx::query(UPSERT_ASSIGNMENT) + .bind(instance_id.to_string()) + .bind(cluster_json) + .execute(&*self.connection_pool) + .await?; + Ok(()) + } + + pub async fn delete_assignment_by_cluster(&self, cluster: &Cluster) -> Result<(), sqlx::Error> { + let cluster_json = + serde_json::to_string(cluster).expect("Failed to serialize Cluster to JSON"); + sqlx::query(DELETE_ASSIGNMENT_BY_CLUSTER) + .bind(cluster_json) + .execute(&*self.connection_pool) + .await?; + Ok(()) + } + + // --- Leader election --- + + /// Attempts to acquire the advisory lock. Returns true if acquired. + /// Uses a dedicated connection (not from the pool) to hold the session-level lock. + pub async fn try_acquire_leader_lock(&self, lock_id: i64) -> Result { + let row: (bool,) = sqlx::query_as(TRY_ADVISORY_LOCK) + .bind(lock_id) + .fetch_one(&*self.connection_pool) + .await?; + Ok(row.0) + } +} diff --git a/rust/crates/scheduler/src/orchestrator/distributor.rs b/rust/crates/scheduler/src/orchestrator/distributor.rs new file mode 100644 index 000000000..cb9404ca8 --- /dev/null +++ b/rust/crates/scheduler/src/orchestrator/distributor.rs @@ -0,0 +1,582 @@ +// Copyright Contributors to the OpenCue Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. + +use std::collections::HashMap; +use std::time::{Duration, Instant}; + +use miette::{IntoDiagnostic, Result}; +use tracing::{debug, info, warn}; +use uuid::Uuid; + +use crate::cluster::{Cluster, ClusterFeed}; +use crate::dao::helpers::parse_uuid; + +use super::dao::{InstanceRow, OrchestratorDao}; + +/// Snapshot of an instance's jobs_queried value at a point in time, +/// used to compute the rate between distribution cycles. +struct RateSnapshot { + jobs_queried: f64, + timestamp: Instant, +} + +/// The distributor runs on the leader instance. It loads all clusters from the database, +/// reads live instances, computes load rates, and assigns clusters to instances. +pub struct Distributor { + /// Previous snapshots of each instance's jobs_queried counter, keyed by instance ID. + previous_snapshots: HashMap, +} + +impl Distributor { + /// Creates a new distributor with empty rate snapshots. + /// + /// The first distribution cycle will use count-based balancing since no + /// previous rate data is available. + pub fn new() -> Self { + Distributor { + previous_snapshots: HashMap::new(), + } + } + + /// Runs one distribution cycle: loads clusters, reads instances, computes assignments. + /// + /// Cleans up dead instances, loads all clusters from the database, reads live + /// instances, computes job query rates, determines optimal cluster-to-instance + /// assignments, and applies changes to the database. + /// + /// # Arguments + /// + /// * `dao` - Database access for reading instances and writing assignments + /// * `ignore_tags` - Allocation tags to exclude from cluster loading + /// * `failure_threshold` - Duration after which an instance without a heartbeat is considered dead + /// + /// # Returns + /// + /// * `Ok(())` - Distribution cycle completed successfully + /// * `Err(miette::Error)` - Database error during distribution + pub async fn distribute( + &mut self, + dao: &OrchestratorDao, + ignore_tags: &[String], + failure_threshold: Duration, + ) -> Result<()> { + // 1. Clean up dead instances (cascade deletes their assignments) + let dead = dao + .delete_dead_instances(failure_threshold) + .await + .into_diagnostic()?; + if !dead.is_empty() { + info!("Removed {} dead instance(s): {:?}", dead.len(), dead); + // Clean snapshots for dead instances + for id in &dead { + self.previous_snapshots.remove(id); + } + } + + // 2. Load all clusters from the database + let all_clusters = ClusterFeed::load_clusters(None, ignore_tags, None).await?; + let all_clusters = ClusterFeed::filter_clusters(all_clusters, ignore_tags); + + if all_clusters.is_empty() { + debug!("No clusters found in database"); + return Ok(()); + } + + // 3. Read live instances + let instances = dao + .get_live_instances(failure_threshold) + .await + .into_diagnostic()?; + + if instances.is_empty() { + warn!("No live instances available for cluster distribution"); + return Ok(()); + } + + // 4. Read current assignments + let current_assignments = dao.get_all_assignments().await.into_diagnostic()?; + + // Build a map: cluster_json -> currently assigned instance_id + let mut current_map: HashMap = HashMap::new(); + for assignment in ¤t_assignments { + current_map.insert( + assignment.str_cluster.clone(), + parse_uuid(&assignment.pk_instance), + ); + } + + // Build set of live instance IDs for quick lookup + let live_ids: std::collections::HashSet = instances + .iter() + .map(|i| parse_uuid(&i.pk_instance)) + .collect(); + + // 5. Compute job query rates per instance + let now = Instant::now(); + let rates = self.compute_rates(&instances, now); + + // 6. Compute new assignments + let new_assignments = + Self::compute_assignments(&all_clusters, &instances, ¤t_map, &live_ids, &rates); + + // 7. Apply assignment changes to the database + self.apply_assignments(dao, &all_clusters, &new_assignments, ¤t_map) + .await?; + + // 8. Update snapshots for next cycle + for instance in &instances { + self.previous_snapshots.insert( + parse_uuid(&instance.pk_instance), + RateSnapshot { + jobs_queried: instance.float_jobs_queried, + timestamp: now, + }, + ); + } + + // 9. Update metrics + crate::metrics::set_orchestrator_instances_alive(instances.len()); + crate::metrics::increment_orchestrator_rebalance(); + + debug!( + "Distribution complete: {} clusters across {} instances", + all_clusters.len(), + instances.len() + ); + + Ok(()) + } + + /// Computes the job query rate for each instance based on the delta from previous snapshots. + /// Returns a map of instance_id -> rate (jobs/second). + /// If no previous snapshot exists (bootstrap), rate is 0.0. + fn compute_rates(&self, instances: &[InstanceRow], now: Instant) -> HashMap { + let mut rates = HashMap::new(); + + for instance in instances { + let id = parse_uuid(&instance.pk_instance); + let rate = if let Some(prev) = self.previous_snapshots.get(&id) { + let delta_jobs = instance.float_jobs_queried - prev.jobs_queried; + let delta_secs = now.duration_since(prev.timestamp).as_secs_f64(); + if delta_secs > 0.0 && delta_jobs >= 0.0 { + delta_jobs / delta_secs + } else { + 0.0 + } + } else { + // No previous snapshot — bootstrap with rate 0 + 0.0 + }; + + rates.insert(id, rate); + } + + rates + } + + /// Pure function that computes the optimal cluster-to-instance assignment. + /// + /// Strategy: + /// 1. Preserve stable assignments (cluster stays on same live instance if eligible). + /// 2. Assign unassigned clusters to the instance with the lowest load ratio. + fn compute_assignments( + all_clusters: &[Cluster], + instances: &[InstanceRow], + current_map: &HashMap, + live_ids: &std::collections::HashSet, + rates: &HashMap, + ) -> HashMap { + let mut assignments: HashMap = HashMap::new(); + + // Parse instance IDs once + let instance_ids: Vec = instances + .iter() + .map(|i| parse_uuid(&i.pk_instance)) + .collect(); + + // Track load per instance (weighted by rate / capacity) + let mut instance_load: HashMap = instance_ids + .iter() + .map(|id| (*id, *rates.get(id).unwrap_or(&0.0))) + .collect(); + + // Track assignment count per instance for bootstrap (when all rates are 0) + let mut instance_count: HashMap = + instance_ids.iter().map(|id| (*id, 0)).collect(); + + let all_rates_zero = rates.values().all(|r| *r == 0.0); + + // Build instance capacity map + let capacity_map: HashMap = instances + .iter() + .zip(instance_ids.iter()) + .map(|(i, id)| (*id, i.int_capacity as f64)) + .collect(); + + // Build facility map for affinity filtering + let instance_facilities: HashMap> = instances + .iter() + .zip(instance_ids.iter()) + .map(|(i, id)| (*id, i.str_facility.clone())) + .collect(); + + // First pass: preserve stable assignments + for cluster in all_clusters { + let cluster_json = serde_json::to_string(cluster).expect("Failed to serialize Cluster"); + + if let Some(¤t_instance) = current_map.get(&cluster_json) { + if live_ids.contains(¤t_instance) + && Self::is_facility_eligible(cluster, &instance_facilities, current_instance) + { + assignments.insert(cluster_json, current_instance); + if let Some(count) = instance_count.get_mut(¤t_instance) { + *count += 1; + } + } + } + } + + // Second pass: assign unassigned clusters + for cluster in all_clusters { + let cluster_json = serde_json::to_string(cluster).expect("Failed to serialize Cluster"); + + if assignments.contains_key(&cluster_json) { + continue; // Already assigned in first pass + } + + // Find eligible instances for this cluster's facility + let eligible: Vec = instance_ids + .iter() + .filter(|id| Self::is_facility_eligible(cluster, &instance_facilities, **id)) + .copied() + .collect(); + + if eligible.is_empty() { + warn!( + "No eligible instance for cluster {} (facility_id={})", + cluster, cluster.facility_id + ); + continue; + } + + // Pick the instance with the lowest load + let best = if all_rates_zero { + // Bootstrap: distribute by count / capacity + *eligible + .iter() + .min_by(|a, b| { + let ratio_a = *instance_count.get(a).unwrap_or(&0) as f64 + / capacity_map.get(a).unwrap_or(&1.0); + let ratio_b = *instance_count.get(b).unwrap_or(&0) as f64 + / capacity_map.get(b).unwrap_or(&1.0); + ratio_a + .partial_cmp(&ratio_b) + .unwrap_or(std::cmp::Ordering::Equal) + }) + .unwrap() + } else { + // Rate-based: pick instance with lowest rate/capacity ratio + *eligible + .iter() + .min_by(|a, b| { + let ratio_a = instance_load.get(a).unwrap_or(&0.0) + / capacity_map.get(a).unwrap_or(&1.0); + let ratio_b = instance_load.get(b).unwrap_or(&0.0) + / capacity_map.get(b).unwrap_or(&1.0); + ratio_a + .partial_cmp(&ratio_b) + .unwrap_or(std::cmp::Ordering::Equal) + }) + .unwrap() + }; + + assignments.insert(cluster_json, best); + if let Some(count) = instance_count.get_mut(&best) { + *count += 1; + } + // For rate-based distribution, slightly increase load estimate for + // subsequent assignments within the same cycle + if let Some(load) = instance_load.get_mut(&best) { + *load += 1.0; + } + } + + assignments + } + + /// Checks whether a cluster is eligible to run on a given instance based on facility. + fn is_facility_eligible( + cluster: &Cluster, + instance_facilities: &HashMap>, + instance_id: Uuid, + ) -> bool { + match instance_facilities.get(&instance_id) { + Some(Some(facility)) => { + // Instance is scoped to a facility — cluster must match + cluster.facility_id.to_string().to_lowercase() == facility.to_lowercase() + } + _ => { + // Instance has no facility scope — accepts all clusters + true + } + } + } + + /// Applies the computed assignments to the database. + /// Upserts new/changed assignments and deletes removed ones. + async fn apply_assignments( + &self, + dao: &OrchestratorDao, + all_clusters: &[Cluster], + new_assignments: &HashMap, + current_map: &HashMap, + ) -> Result<()> { + // Upsert assignments that are new or changed + for cluster in all_clusters { + let cluster_json = serde_json::to_string(cluster).expect("Failed to serialize Cluster"); + + if let Some(&new_instance) = new_assignments.get(&cluster_json) { + let needs_upsert = match current_map.get(&cluster_json) { + Some(¤t_instance) => current_instance != new_instance, + None => true, + }; + + if needs_upsert { + dao.upsert_assignment(new_instance, cluster) + .await + .into_diagnostic()?; + debug!("Assigned cluster {} to instance {}", cluster, new_instance); + } + } + } + + // Delete assignments for clusters that no longer exist in the database + for cluster_json in current_map.keys() { + if !new_assignments.contains_key(cluster_json) { + // This cluster no longer exists — try to parse and delete + if let Ok(cluster) = serde_json::from_str::(cluster_json) { + dao.delete_assignment_by_cluster(&cluster) + .await + .into_diagnostic()?; + debug!("Removed stale assignment for cluster {}", cluster); + } + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use std::collections::{HashMap, HashSet}; + + use uuid::Uuid; + + use crate::cluster::Cluster; + use crate::cluster_key::{Tag, TagType}; + use crate::dao::helpers::parse_uuid; + + use super::{Distributor, InstanceRow}; + + fn make_cluster(facility_id: Uuid, show_id: Uuid, tag: &str) -> Cluster { + Cluster::single_tag( + facility_id, + show_id, + Tag { + name: tag.to_string(), + ttype: TagType::Alloc, + }, + ) + } + + fn make_instance(id: Uuid, facility: Option<&str>, capacity: i32) -> InstanceRow { + InstanceRow { + pk_instance: id.to_string(), + str_name: format!("test:{}", id), + str_facility: facility.map(String::from), + int_capacity: capacity, + float_jobs_queried: 0.0, + b_draining: false, + } + } + + #[test] + fn test_even_distribution_bootstrap() { + let facility = Uuid::new_v4(); + let show = Uuid::new_v4(); + + let clusters: Vec = (0..6) + .map(|i| make_cluster(facility, show, &format!("tag{}", i))) + .collect(); + + let inst_a = Uuid::new_v4(); + let inst_b = Uuid::new_v4(); + let instances = vec![ + make_instance(inst_a, None, 100), + make_instance(inst_b, None, 100), + ]; + + let live_ids: HashSet = instances + .iter() + .map(|i| parse_uuid(&i.pk_instance)) + .collect(); + let rates: HashMap = instances + .iter() + .map(|i| (parse_uuid(&i.pk_instance), 0.0)) + .collect(); + let current_map = HashMap::new(); + + let assignments = Distributor::compute_assignments( + &clusters, + &instances, + ¤t_map, + &live_ids, + &rates, + ); + + // All clusters should be assigned + assert_eq!(assignments.len(), 6); + + // Each instance should get 3 clusters + let count_a = assignments.values().filter(|&&v| v == inst_a).count(); + let count_b = assignments.values().filter(|&&v| v == inst_b).count(); + assert_eq!(count_a, 3); + assert_eq!(count_b, 3); + } + + #[test] + fn test_stable_assignments_preserved() { + let facility = Uuid::new_v4(); + let show = Uuid::new_v4(); + + let clusters: Vec = (0..4) + .map(|i| make_cluster(facility, show, &format!("tag{}", i))) + .collect(); + + let inst_a = Uuid::new_v4(); + let inst_b = Uuid::new_v4(); + let instances = vec![ + make_instance(inst_a, None, 100), + make_instance(inst_b, None, 100), + ]; + + let live_ids: HashSet = instances + .iter() + .map(|i| parse_uuid(&i.pk_instance)) + .collect(); + let rates: HashMap = vec![(inst_a, 100.0), (inst_b, 50.0)].into_iter().collect(); + + // Pre-assign all clusters to inst_a + let current_map: HashMap = clusters + .iter() + .map(|c| (serde_json::to_string(c).unwrap(), inst_a)) + .collect(); + + let assignments = Distributor::compute_assignments( + &clusters, + &instances, + ¤t_map, + &live_ids, + &rates, + ); + + // All clusters should stay with inst_a (stability) + for (_, instance) in &assignments { + assert_eq!(*instance, inst_a); + } + } + + #[test] + fn test_facility_affinity() { + let facility_a = Uuid::new_v4(); + let facility_b = Uuid::new_v4(); + let show = Uuid::new_v4(); + + let cluster_a = make_cluster(facility_a, show, "tag_a"); + let cluster_b = make_cluster(facility_b, show, "tag_b"); + let clusters = vec![cluster_a.clone(), cluster_b.clone()]; + + let inst_a = Uuid::new_v4(); + let inst_b = Uuid::new_v4(); + let instances = vec![ + make_instance(inst_a, Some(&facility_a.to_string()), 100), + make_instance(inst_b, Some(&facility_b.to_string()), 100), + ]; + + let live_ids: HashSet = instances + .iter() + .map(|i| parse_uuid(&i.pk_instance)) + .collect(); + let rates: HashMap = instances + .iter() + .map(|i| (parse_uuid(&i.pk_instance), 0.0)) + .collect(); + let current_map = HashMap::new(); + + let assignments = Distributor::compute_assignments( + &clusters, + &instances, + ¤t_map, + &live_ids, + &rates, + ); + + let cluster_a_json = serde_json::to_string(&cluster_a).unwrap(); + let cluster_b_json = serde_json::to_string(&cluster_b).unwrap(); + + assert_eq!(assignments[&cluster_a_json], inst_a); + assert_eq!(assignments[&cluster_b_json], inst_b); + } + + #[test] + fn test_weighted_capacity() { + let facility = Uuid::new_v4(); + let show = Uuid::new_v4(); + + let clusters: Vec = (0..9) + .map(|i| make_cluster(facility, show, &format!("tag{}", i))) + .collect(); + + let inst_a = Uuid::new_v4(); + let inst_b = Uuid::new_v4(); + let instances = vec![ + make_instance(inst_a, None, 200), // 2x capacity + make_instance(inst_b, None, 100), + ]; + + let live_ids: HashSet = instances + .iter() + .map(|i| parse_uuid(&i.pk_instance)) + .collect(); + let rates: HashMap = instances + .iter() + .map(|i| (parse_uuid(&i.pk_instance), 0.0)) + .collect(); + let current_map = HashMap::new(); + + let assignments = Distributor::compute_assignments( + &clusters, + &instances, + ¤t_map, + &live_ids, + &rates, + ); + + let count_a = assignments.values().filter(|&&v| v == inst_a).count(); + let count_b = assignments.values().filter(|&&v| v == inst_b).count(); + + // inst_a should get ~6, inst_b should get ~3 (2:1 ratio) + assert_eq!(count_a, 6); + assert_eq!(count_b, 3); + } +} diff --git a/rust/crates/scheduler/src/orchestrator/instance.rs b/rust/crates/scheduler/src/orchestrator/instance.rs new file mode 100644 index 000000000..3ae23cce6 --- /dev/null +++ b/rust/crates/scheduler/src/orchestrator/instance.rs @@ -0,0 +1,153 @@ +// Copyright Contributors to the OpenCue Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. + +use std::sync::Arc; + +use miette::{IntoDiagnostic, Result}; +use tokio::sync::watch; +use tokio::task::JoinHandle; +use tracing::{error, info, warn}; +use uuid::Uuid; + +use crate::config::CONFIG; +use crate::metrics::JOBS_QUERIED_TOTAL; + +use super::dao::OrchestratorDao; + +/// Manages this scheduler instance's lifecycle in the orchestrator registry. +/// +/// Handles registration, periodic heartbeat updates, and graceful deregistration. +pub struct InstanceManager { + pub instance_id: Uuid, + instance_name: String, + facility: Option, + capacity: i32, + dao: Arc, +} + +impl InstanceManager { + /// Creates a new instance manager and establishes a database connection. + /// + /// Generates a unique instance ID, builds an instance name from hostname and PID, + /// and initializes the orchestrator DAO for database operations. + /// + /// # Arguments + /// + /// * `facility` - Optional facility name to scope this instance to a specific facility + /// + /// # Returns + /// + /// * `Ok(InstanceManager)` - Successfully created instance manager + /// * `Err(miette::Error)` - Failed to establish database connection + pub async fn new(facility: Option) -> Result { + let instance_id = Uuid::new_v4(); + let hostname = gethostname::gethostname().to_string_lossy().to_string(); + let pid = std::process::id(); + let instance_name = format!("{}:{}", hostname, pid); + let capacity = CONFIG.orchestrator.capacity as i32; + let dao = Arc::new(OrchestratorDao::new().await?); + + Ok(InstanceManager { + instance_id, + instance_name, + facility, + capacity, + dao, + }) + } + + /// Registers this instance in the scheduler_instance table. + /// + /// Inserts a row with the instance's ID, name, facility, and capacity so that + /// the leader can discover it during distribution cycles. + /// + /// # Returns + /// + /// * `Ok(())` - Instance registered successfully + /// * `Err(miette::Error)` - Database insert failed + pub async fn register(&self) -> Result<()> { + self.dao + .register_instance( + self.instance_id, + &self.instance_name, + self.facility.as_deref(), + self.capacity, + ) + .await + .into_diagnostic()?; + info!( + instance_id = %self.instance_id, + name = %self.instance_name, + facility = ?self.facility, + capacity = self.capacity, + "Registered scheduler instance" + ); + Ok(()) + } + + /// Starts the heartbeat loop. + /// + /// Spawns a background task that periodically updates this instance's heartbeat + /// timestamp and jobs_queried counter in the database. Runs until the shutdown + /// signal is received. + /// + /// # Arguments + /// + /// * `shutdown` - Watch receiver that signals when the loop should stop + /// + /// # Returns + /// + /// A `JoinHandle` for the spawned heartbeat task. + pub fn start_heartbeat(&self, mut shutdown: watch::Receiver) -> JoinHandle<()> { + let instance_id = self.instance_id; + let dao = self.dao.clone(); + let interval = CONFIG.orchestrator.heartbeat_interval; + + tokio::spawn(async move { + let mut ticker = tokio::time::interval(interval); + loop { + tokio::select! { + _ = ticker.tick() => { + let jobs_queried = JOBS_QUERIED_TOTAL.get(); + if let Err(e) = dao.update_heartbeat(instance_id, jobs_queried).await { + error!(instance_id = %instance_id, "Failed to update heartbeat: {}", e); + } + } + _ = shutdown.changed() => { + info!(instance_id = %instance_id, "Heartbeat loop shutting down"); + break; + } + } + } + }) + } + + /// Gracefully shuts down this instance: marks as draining, then deletes the row. + pub async fn shutdown(&self) { + info!(instance_id = %self.instance_id, "Initiating graceful shutdown"); + + if let Err(e) = self.dao.set_draining(self.instance_id).await { + warn!(instance_id = %self.instance_id, "Failed to set draining: {}", e); + } + + if let Err(e) = self.dao.delete_instance(self.instance_id).await { + warn!(instance_id = %self.instance_id, "Failed to delete instance: {}", e); + } + + info!(instance_id = %self.instance_id, "Instance deregistered"); + } + + /// Returns a reference to the shared orchestrator DAO. + pub fn dao(&self) -> &Arc { + &self.dao + } +} diff --git a/rust/crates/scheduler/src/orchestrator/leader.rs b/rust/crates/scheduler/src/orchestrator/leader.rs new file mode 100644 index 000000000..b7f4af41d --- /dev/null +++ b/rust/crates/scheduler/src/orchestrator/leader.rs @@ -0,0 +1,158 @@ +// Copyright Contributors to the OpenCue Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. + +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; + +use tokio::sync::watch; +use tokio::task::JoinHandle; +use tracing::{error, info, warn}; + +use crate::config::CONFIG; + +use super::dao::OrchestratorDao; +use super::distributor::Distributor; + +/// Well-known advisory lock ID for the orchestrator leader. +/// "OpenCue" encoded as hex digits: 0x4F70656E437565 +const ORCHESTRATOR_LOCK_ID: i64 = 0x4F70656E437565; + +/// Manages leader election via PostgreSQL advisory locks and runs the distribution +/// loop when this instance is the leader. +pub struct LeaderElection { + dao: Arc, + is_leader: Arc, +} + +impl LeaderElection { + /// Creates a new leader election manager. + /// + /// # Arguments + /// + /// * `dao` - Shared orchestrator DAO for advisory lock operations + pub fn new(dao: Arc) -> Self { + LeaderElection { + dao, + is_leader: Arc::new(AtomicBool::new(false)), + } + } + + /// Returns whether this instance is currently the leader. + #[allow(dead_code)] + pub fn is_leader(&self) -> bool { + self.is_leader.load(Ordering::Relaxed) + } + + /// Returns a cloned `Arc` that reflects the current leader status. + /// + /// Useful for sharing the leader flag with other tasks that need to check + /// leadership without holding a reference to the `LeaderElection` struct. + #[allow(dead_code)] + pub fn is_leader_flag(&self) -> Arc { + self.is_leader.clone() + } + + /// Starts the leader election and distribution loop. + /// + /// Continuously tries to acquire the advisory lock. Once acquired, runs the + /// distributor loop. If the lock is lost (e.g., PG connection drops), demotes + /// self and re-enters election. + /// + /// # Arguments + /// + /// * `ignore_tags` - Allocation tags to exclude from cluster loading during distribution + /// * `shutdown` - Watch receiver that signals when the loop should stop + /// + /// # Returns + /// + /// A `JoinHandle` for the spawned election/distribution task. + pub fn start( + &self, + ignore_tags: Vec, + mut shutdown: watch::Receiver, + ) -> JoinHandle<()> { + let dao = self.dao.clone(); + let is_leader = self.is_leader.clone(); + let election_interval = CONFIG.orchestrator.election_interval; + let distribution_interval = CONFIG.orchestrator.distribution_interval; + let failure_threshold = CONFIG.orchestrator.failure_threshold; + + tokio::spawn(async move { + let mut distributor = Distributor::new(); + + loop { + // Check for shutdown + if *shutdown.borrow() { + info!("Leader election loop shutting down"); + break; + } + + if is_leader.load(Ordering::Relaxed) { + // We are the leader — run one distribution cycle + match distributor + .distribute(&dao, &ignore_tags, failure_threshold) + .await + { + Ok(()) => {} + Err(e) => { + error!("Distribution cycle failed: {}", e); + // If distribution fails, it might be a DB issue. + // Demote and re-enter election after interval. + is_leader.store(false, Ordering::Relaxed); + warn!("Demoted from leader due to distribution failure"); + crate::metrics::set_orchestrator_is_leader(false); + } + } + + // Wait for next distribution cycle or shutdown + tokio::select! { + _ = tokio::time::sleep(distribution_interval) => {} + _ = shutdown.changed() => { + info!("Leader loop received shutdown signal"); + break; + } + } + } else { + // Not leader — try to acquire the lock + match dao.try_acquire_leader_lock(ORCHESTRATOR_LOCK_ID).await { + Ok(true) => { + info!("Acquired leader lock — this instance is now the leader"); + is_leader.store(true, Ordering::Relaxed); + crate::metrics::set_orchestrator_is_leader(true); + // Reset distributor state for fresh snapshots + distributor = Distributor::new(); + } + Ok(false) => { + // Another instance holds the lock + } + Err(e) => { + warn!("Failed to attempt leader lock acquisition: {}", e); + } + } + + // Wait before retrying election or shutdown + tokio::select! { + _ = tokio::time::sleep(election_interval) => {} + _ = shutdown.changed() => { + info!("Election loop received shutdown signal"); + break; + } + } + } + } + + // On shutdown, demote + is_leader.store(false, Ordering::Relaxed); + crate::metrics::set_orchestrator_is_leader(false); + }) + } +} diff --git a/rust/crates/scheduler/src/orchestrator/mod.rs b/rust/crates/scheduler/src/orchestrator/mod.rs new file mode 100644 index 000000000..d57f74e0a --- /dev/null +++ b/rust/crates/scheduler/src/orchestrator/mod.rs @@ -0,0 +1,108 @@ +// Copyright Contributors to the OpenCue Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. + +pub mod dao; +mod distributor; +mod instance; +mod leader; +mod sync; + +use miette::Result; +use tokio::sync::watch; +use tracing::info; + +use crate::cluster::ClusterFeed; +use crate::config::CONFIG; + +use instance::InstanceManager; +use leader::LeaderElection; +use sync::ClusterSync; + +/// Main entry point for orchestrated mode. +/// +/// Sets up instance registration, heartbeat, leader election, cluster sync, +/// and then runs the scheduling pipeline. On shutdown (SIGTERM/SIGINT), +/// gracefully drains and deregisters. +/// +/// # Arguments +/// +/// * `facility` - Optional facility name to scope this instance to a specific facility +/// * `ignore_tags` - Allocation tags to exclude from cluster loading +/// +/// # Returns +/// +/// * `Ok(())` - Scheduler completed successfully +/// * `Err(miette::Error)` - Fatal error during setup or pipeline execution +pub async fn run(facility: Option, ignore_tags: Vec) -> Result<()> { + // Shutdown signal: send `true` to stop all loops + let (shutdown_tx, shutdown_rx) = watch::channel(false); + + // 1. Register this instance + let instance_mgr = InstanceManager::new(facility).await?; + instance_mgr.register().await?; + + info!( + instance_id = %instance_mgr.instance_id, + "Starting orchestrated mode" + ); + + // 2. Create an empty ClusterFeed — clusters will be populated by the sync loop + let cluster_feed = ClusterFeed::empty(); + + // 3. Start heartbeat loop + let heartbeat_handle = instance_mgr.start_heartbeat(shutdown_rx.clone()); + + // 4. Start leader election + distribution loop + let leader_election = LeaderElection::new(instance_mgr.dao().clone()); + let leader_handle = leader_election.start(ignore_tags, shutdown_rx.clone()); + + // 5. Start cluster sync loop (worker side) + let sync_handle = ClusterSync::start( + instance_mgr.instance_id, + instance_mgr.dao().clone(), + cluster_feed.clone(), + shutdown_rx.clone(), + ); + + // 6. Set up SIGTERM/SIGINT handler for graceful shutdown + let shutdown_tx_clone = shutdown_tx.clone(); + let shutdown_handle = tokio::spawn(async move { + tokio::signal::ctrl_c() + .await + .expect("Failed to listen for ctrl-c"); + info!("Received shutdown signal"); + let _ = shutdown_tx_clone.send(true); + }); + + // 7. Run the pipeline — this blocks until the feed is stopped + let pipeline_result = crate::pipeline::run(cluster_feed).await; + + // 8. Graceful shutdown sequence + info!("Pipeline stopped, initiating shutdown..."); + let _ = shutdown_tx.send(true); + + // Wait for background tasks with a timeout + let timeout = CONFIG.orchestrator.shutdown_timeout; + let _ = tokio::time::timeout(timeout, async { + let _ = heartbeat_handle.await; + let _ = leader_handle.await; + let _ = sync_handle.await; + }) + .await; + + // Deregister this instance + instance_mgr.shutdown().await; + + shutdown_handle.abort(); + + pipeline_result +} diff --git a/rust/crates/scheduler/src/orchestrator/sync.rs b/rust/crates/scheduler/src/orchestrator/sync.rs new file mode 100644 index 000000000..acfacc77b --- /dev/null +++ b/rust/crates/scheduler/src/orchestrator/sync.rs @@ -0,0 +1,106 @@ +// Copyright Contributors to the OpenCue Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. + +use std::sync::Arc; + +use tokio::sync::watch; +use tokio::task::JoinHandle; +use tracing::{debug, error, info}; +use uuid::Uuid; + +use crate::cluster::{Cluster, ClusterFeed}; +use crate::config::CONFIG; + +use super::dao::OrchestratorDao; + +/// Worker-side cluster synchronization. +/// +/// Periodically polls `scheduler_cluster_assignment` for this instance's assigned clusters +/// and updates the local `ClusterFeed` accordingly. +pub struct ClusterSync; + +impl ClusterSync { + /// Starts the cluster sync polling loop. + /// + /// Polls the database for assigned clusters and updates the ClusterFeed's internal + /// cluster list. The pipeline's round-robin stream automatically picks up the changes. + /// + /// # Arguments + /// + /// * `instance_id` - UUID of this scheduler instance to query assignments for + /// * `dao` - Shared orchestrator DAO for database queries + /// * `cluster_feed` - Shared cluster feed to update with assigned clusters + /// * `shutdown` - Watch receiver that signals when the loop should stop + /// + /// # Returns + /// + /// A `JoinHandle` for the spawned sync polling task. + pub fn start( + instance_id: Uuid, + dao: Arc, + cluster_feed: Arc, + mut shutdown: watch::Receiver, + ) -> JoinHandle<()> { + let poll_interval = CONFIG.orchestrator.poll_interval; + + tokio::spawn(async move { + let mut ticker = tokio::time::interval(poll_interval); + + loop { + tokio::select! { + _ = ticker.tick() => { + match dao.get_assignments_for_instance(instance_id).await { + Ok(assignments) => { + let clusters: Vec = assignments + .into_iter() + .filter_map(|row| { + match serde_json::from_str::(&row.str_cluster) { + Ok(cluster) => Some(cluster), + Err(e) => { + error!( + "Failed to deserialize cluster assignment: {}. JSON: {}", + e, row.str_cluster + ); + None + } + } + }) + .collect(); + + let count = clusters.len(); + cluster_feed.update_clusters(clusters); + crate::metrics::set_orchestrator_assigned_clusters(count); + + debug!( + instance_id = %instance_id, + "Synced {} cluster assignment(s)", + count + ); + } + Err(e) => { + error!( + instance_id = %instance_id, + "Failed to poll cluster assignments: {}", + e + ); + } + } + } + _ = shutdown.changed() => { + info!(instance_id = %instance_id, "Cluster sync loop shutting down"); + break; + } + } + } + }) + } +} diff --git a/rust/crates/scheduler/src/pipeline/entrypoint.rs b/rust/crates/scheduler/src/pipeline/entrypoint.rs index 7a4ad034b..b26783451 100644 --- a/rust/crates/scheduler/src/pipeline/entrypoint.rs +++ b/rust/crates/scheduler/src/pipeline/entrypoint.rs @@ -40,7 +40,7 @@ use crate::pipeline::MatchingService; /// /// * `Ok(())` - Scheduler completed successfully /// * `Err(miette::Error)` - Fatal error occurred during processing -pub async fn run(cluster_feed: ClusterFeed) -> miette::Result<()> { +pub async fn run(cluster_feed: Arc) -> miette::Result<()> { let job_fetcher = Arc::new(JobDao::new().await?); let matcher = Arc::new(MatchingService::new().await?); let cycles_without_jobs = Arc::new(AtomicUsize::new(0)); diff --git a/rust/crates/scheduler/tests/smoke_tests.rs b/rust/crates/scheduler/tests/smoke_tests.rs index 8f1af6c32..7b9d4e845 100644 --- a/rust/crates/scheduler/tests/smoke_tests.rs +++ b/rust/crates/scheduler/tests/smoke_tests.rs @@ -48,7 +48,7 @@ mod scheduler_smoke_test { use scheduler::{ cluster::{Cluster, ClusterFeed}, - cluster_key::{ClusterKey, Tag, TagType}, + cluster_key::{Tag, TagType}, pipeline, }; use tracing::info; @@ -831,16 +831,16 @@ mod scheduler_smoke_test { async fn test_dispatch_hostname_tag_flow_inner(test_data: TestData) { // Create a specific cluster feed for HOSTNAME tag testing - let hostname_cluster = Cluster::ComposedKey(ClusterKey { - facility_id: test_data.facility_id.to_string(), - show_id: test_data.show_id.to_string(), - tag: Tag { + let hostname_cluster = Cluster::single_tag( + test_data.facility_id, + test_data.show_id, + Tag { name: format!("integ_test_hostname_tag_{}", test_data.test_suffix), ttype: TagType::HostName, }, - }); + ); - let cluster_feed = ClusterFeed::load_from_clusters(vec![hostname_cluster], &[]); + let cluster_feed = ClusterFeed::from_clusters(vec![hostname_cluster], &[]); info!("Starting HOSTNAME tag integration test..."); @@ -887,16 +887,16 @@ mod scheduler_smoke_test { async fn test_dispatch_alloc_tag_flow_inner(test_data: TestData) { // Create a specific cluster feed for ALLOC tag testing - let alloc_cluster = Cluster::ComposedKey(ClusterKey { - facility_id: test_data.facility_id.to_string(), - show_id: test_data.show_id.to_string(), - tag: Tag { + let alloc_cluster = Cluster::single_tag( + test_data.facility_id, + test_data.show_id, + Tag { name: format!("integ_test_alloc_tag_{}", test_data.test_suffix), ttype: TagType::Alloc, }, - }); + ); - let cluster_feed = ClusterFeed::load_from_clusters(vec![alloc_cluster], &[]); + let cluster_feed = ClusterFeed::from_clusters(vec![alloc_cluster], &[]); info!("Starting ALLOC tag integration test..."); @@ -934,12 +934,16 @@ mod scheduler_smoke_test { async fn test_dispatch_manual_tag_flow_inner(test_data: TestData) { // Create a cluster feed with MANUAL tags (chunked) - let manual_cluster = Cluster::TagsKey(vec![Tag { - name: format!("integ_test_manual_tag_{}", test_data.test_suffix), - ttype: TagType::Manual, - }]); + let manual_cluster = Cluster::multiple_tag( + test_data.facility_id, + test_data.show_id, + vec![Tag { + name: format!("integ_test_manual_tag_{}", test_data.test_suffix), + ttype: TagType::Manual, + }], + ); - let cluster_feed = ClusterFeed::load_from_clusters(vec![manual_cluster], &[]); + let cluster_feed = ClusterFeed::from_clusters(vec![manual_cluster], &[]); info!("Starting MANUAL tag integration test..."); let frame_count = test_data.num_frames(); @@ -977,29 +981,33 @@ mod scheduler_smoke_test { async fn test_dispatch_mixed_job_scenario_inner(test_data: TestData) { // Create multiple clusters to handle the mixed job with different tag types let clusters = vec![ - Cluster::ComposedKey(ClusterKey { - facility_id: test_data.facility_id.to_string(), - show_id: test_data.show_id.to_string(), - tag: Tag { + Cluster::single_tag( + test_data.facility_id, + test_data.show_id, + Tag { name: format!("integ_test_hostname_tag_{}", test_data.test_suffix), ttype: TagType::HostName, }, - }), - Cluster::ComposedKey(ClusterKey { - facility_id: test_data.facility_id.to_string(), - show_id: test_data.show_id.to_string(), - tag: Tag { + ), + Cluster::single_tag( + test_data.facility_id, + test_data.show_id, + Tag { name: format!("integ_test_alloc_tag_{}", test_data.test_suffix), ttype: TagType::Alloc, }, - }), - Cluster::TagsKey(vec![Tag { - name: format!("integ_test_manual_tag_{}", test_data.test_suffix), - ttype: TagType::Manual, - }]), + ), + Cluster::multiple_tag( + test_data.facility_id, + test_data.show_id, + vec![Tag { + name: format!("integ_test_manual_tag_{}", test_data.test_suffix), + ttype: TagType::Manual, + }], + ), ]; - let cluster_feed = ClusterFeed::load_from_clusters(clusters, &[]); + let cluster_feed = ClusterFeed::from_clusters(clusters, &[]); info!("Starting mixed job scenario integration test..."); @@ -1042,14 +1050,18 @@ mod scheduler_smoke_test { assert_ok!(result, "Failure at test wrapper") } - async fn test_dispatcher_no_matching_hosts_inner(_test_data: TestData) { + async fn test_dispatcher_no_matching_hosts_inner(test_data: TestData) { // Create a cluster with a non-existent tag that won't match any hosts - let non_matching_cluster = Cluster::TagsKey(vec![Tag { - name: "non_existent_tag".to_string(), - ttype: TagType::Manual, - }]); + let non_matching_cluster = Cluster::multiple_tag( + test_data.facility_id, + test_data.show_id, + vec![Tag { + name: "non_existent_tag".to_string(), + ttype: TagType::Manual, + }], + ); - let cluster_feed = ClusterFeed::load_from_clusters(vec![non_matching_cluster], &[]); + let cluster_feed = ClusterFeed::from_clusters(vec![non_matching_cluster], &[]); info!("Starting no matching hosts integration test..."); diff --git a/rust/crates/scheduler/tests/stress_tests.rs b/rust/crates/scheduler/tests/stress_tests.rs index ad47917ea..8047eaec1 100644 --- a/rust/crates/scheduler/tests/stress_tests.rs +++ b/rust/crates/scheduler/tests/stress_tests.rs @@ -89,7 +89,7 @@ mod stress_test { let test_data = assert_ok!(setup(&desc).await); let cluster_len = test_data.clusters.len(); - let cluster_feed = ClusterFeed::load_from_clusters(test_data.clusters, &[]); + let cluster_feed = ClusterFeed::from_clusters(test_data.clusters, &[]); info!( "Starting Small stress test {} - cluster size: {:?}", test_data.test_prefix, cluster_len diff --git a/rust/crates/scheduler/tests/util.rs b/rust/crates/scheduler/tests/util.rs index 6e96fb765..c6c425053 100644 --- a/rust/crates/scheduler/tests/util.rs +++ b/rust/crates/scheduler/tests/util.rs @@ -123,6 +123,7 @@ pub fn create_test_config() -> Config { }, host_cache: host_cache_config, scheduler: SchedulerConfig::default(), + orchestrator: Default::default(), } } From 67090669f89a6b4c466e2e493bd8ffb21fba5934 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Tue, 3 Mar 2026 18:45:27 -0800 Subject: [PATCH 02/16] Add assignment expiration and refator The orchestrator needs to expire assignments to allow rebalancing of the clusters when new instances join the system. Change cluster_id logic to account for changing cluster tags without triggering a cluster to be reassigned. The new logic computes a cluster_id with facility_id:show_id:type:tag for alloc clusters and facility_id:show_id:type:chunk_index for chuncked tags (manual, hostname and hardware). Entire-Checkpoint: d04a08f3d901 --- ...V38__Add_scheduler_orchestrator_tables.sql | 13 +- rust/crates/scheduler/src/cluster.rs | 110 ++- rust/crates/scheduler/src/cluster_key.rs | 11 + rust/crates/scheduler/src/config/mod.rs | 6 + rust/crates/scheduler/src/main.rs | 2 +- rust/crates/scheduler/src/orchestrator/dao.rs | 62 +- .../scheduler/src/orchestrator/distributor.rs | 678 +++++++++++++----- .../scheduler/src/orchestrator/leader.rs | 6 + .../crates/scheduler/src/orchestrator/sync.rs | 4 +- rust/crates/scheduler/tests/util.rs | 2 +- 10 files changed, 660 insertions(+), 234 deletions(-) diff --git a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V38__Add_scheduler_orchestrator_tables.sql b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V38__Add_scheduler_orchestrator_tables.sql index 097b59077..23ec014ff 100644 --- a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V38__Add_scheduler_orchestrator_tables.sql +++ b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V38__Add_scheduler_orchestrator_tables.sql @@ -14,12 +14,13 @@ CREATE TABLE scheduler_instance ( CREATE INDEX idx_scheduler_instance_heartbeat ON scheduler_instance(ts_heartbeat); CREATE TABLE scheduler_cluster_assignment ( - pk_assignment UUID PRIMARY KEY DEFAULT gen_random_uuid(), - pk_instance UUID NOT NULL REFERENCES scheduler_instance(pk_instance) ON DELETE CASCADE, - str_cluster TEXT NOT NULL, - int_version INTEGER NOT NULL DEFAULT 0, - ts_assigned TIMESTAMPTZ NOT NULL DEFAULT NOW(), - UNIQUE(str_cluster) + pk_assignment UUID PRIMARY KEY DEFAULT gen_random_uuid(), + pk_instance UUID NOT NULL REFERENCES scheduler_instance(pk_instance) ON DELETE CASCADE, + str_cluster_id TEXT NOT NULL, + str_cluster_json TEXT NOT NULL, + int_version INTEGER NOT NULL DEFAULT 0, + ts_assigned TIMESTAMPTZ NOT NULL DEFAULT NOW(), + UNIQUE(str_cluster_id) ); CREATE INDEX idx_sca_instance ON scheduler_cluster_assignment(pk_instance); diff --git a/rust/crates/scheduler/src/cluster.rs b/rust/crates/scheduler/src/cluster.rs index 8ffb347a2..f30591491 100644 --- a/rust/crates/scheduler/src/cluster.rs +++ b/rust/crates/scheduler/src/cluster.rs @@ -11,7 +11,7 @@ // the License. use std::{ - collections::{BTreeSet, HashMap, HashSet}, + collections::{BTreeMap, BTreeSet, HashMap, HashSet}, sync::{ atomic::{AtomicBool, AtomicUsize, Ordering}, Arc, Mutex, RwLock, @@ -37,6 +37,9 @@ pub static CLUSTER_ROUNDS: AtomicUsize = AtomicUsize::new(0); #[derive(Serialize, Deserialize, Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] pub struct Cluster { + /// Stable identity key, independent of tag content for chunked clusters. + /// Format: "{facility_id}:{show_id}:{tag_type}:{tag_name_or_chunk_index}" + pub id: String, pub facility_id: Uuid, pub show_id: Uuid, pub tags: BTreeSet, @@ -44,32 +47,65 @@ pub struct Cluster { impl std::fmt::Display for Cluster { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}:{}:{}", - self.facility_id, - self.show_id, - self.tags.iter().join(",") - ) + write!(f, "{}", self.id) } } impl Cluster { pub fn single_tag(facility_id: Uuid, show_id: Uuid, tag: Tag) -> Self { + let id = format!( + "{}:{}:{}:{}", + facility_id, + show_id, + tag.ttype.as_str(), + tag.name + ); Cluster { + id, facility_id, show_id, tags: BTreeSet::from([tag]), } } - pub fn multiple_tag(facility_id: Uuid, show_id: Uuid, tags: Vec) -> Self { + /// Creates a cluster for a batch of tags loaded from the DB, identified by chunk index. + /// The ID is position-based, so tag content can change without affecting identity. + pub fn chunked( + facility_id: Uuid, + show_id: Uuid, + tag_type: &str, + chunk_index: usize, + tags: Vec, + ) -> Self { + let id = format!("{}:{}:{}:{}", facility_id, show_id, tag_type, chunk_index); Cluster { + id, facility_id, show_id, tags: tags.into_iter().collect(), } } + + /// Creates a cluster from explicitly provided tags (e.g. CLI arguments). + /// The ID is derived from the sorted tag names. + pub fn from_tags(facility_id: Uuid, show_id: Uuid, tags: Vec) -> Self { + let tag_type = tags.first().map_or("unknown", |t| t.ttype.as_str()); + let sorted_tags: BTreeSet = tags.into_iter().collect(); + let tag_names: Vec<&str> = sorted_tags.iter().map(|t| t.name.as_str()).collect(); + let id = format!( + "{}:{}:{}:{}", + facility_id, + show_id, + tag_type, + tag_names.join(",") + ); + Cluster { + id, + facility_id, + show_id, + tags: sorted_tags, + } + } } #[derive(Debug)] @@ -186,6 +222,7 @@ impl ClusterFeed { /// /// Applies ignore-tag filtering, then wraps the result in an `Arc`. /// Useful in tests and scenarios where clusters are known upfront. + #[allow(dead_code)] pub fn from_clusters(clusters: Vec, ignore_tags: &[String]) -> Arc { let clusters = Self::filter_clusters(clusters, ignore_tags); Arc::new(ClusterFeed { @@ -260,9 +297,9 @@ impl ClusterFeed { .fetch_alloc_clusters(facility_id, shows_filter.clone()) .chain(cluster_dao.fetch_non_alloc_clusters(facility_id, shows_filter)); let mut clusters = Vec::new(); - let mut manual_tags: HashMap<(Uuid, Uuid), HashSet> = HashMap::new(); - let mut hardware_tags: HashMap<(Uuid, Uuid), HashSet> = HashMap::new(); - let mut hostname_tags: HashMap<(Uuid, Uuid), HashSet> = HashMap::new(); + let mut manual_tags: BTreeMap<(Uuid, Uuid), BTreeSet> = BTreeMap::new(); + let mut hardware_tags: BTreeMap<(Uuid, Uuid), BTreeSet> = BTreeMap::new(); + let mut hostname_tags: BTreeMap<(Uuid, Uuid), BTreeSet> = BTreeMap::new(); // Collect all tags while let Some(record) = clusters_stream.next().await { @@ -276,7 +313,7 @@ impl ClusterFeed { let facility_id = parse_uuid(&cluster.facility_id); let show_id = parse_uuid(&cluster.show_id); match cluster.ttype.as_str() { - // Each alloc tag becomes its own cluster + // Each alloc tag becomes its own cluster with a stable ID "ALLOC" => { clusters.push(Cluster::single_tag( facility_id, @@ -322,31 +359,54 @@ impl ClusterFeed { } } - // Chunk Manual tags + // Chunk Manual tags (BTreeSet ensures deterministic sorted order) for ((show_id, facility_id), tags) in manual_tags.into_iter() { - for chunk in &tags.into_iter().chunks(CONFIG.queue.manual_tags_chunk_size) { - clusters.push(Cluster::multiple_tag(facility_id, show_id, chunk.collect())) + for (idx, chunk) in (&tags.into_iter().chunks(CONFIG.queue.manual_tags_chunk_size)) + .into_iter() + .enumerate() + { + clusters.push(Cluster::chunked( + facility_id, + show_id, + "manual", + idx, + chunk.collect(), + )) } } - // Chunk Hostname tags + // Chunk Hostname tags (BTreeSet ensures deterministic sorted order) for ((show_id, facility_id), tags) in hostname_tags.into_iter() { - for chunk in &tags + for (idx, chunk) in (&tags + .into_iter() + .chunks(CONFIG.queue.hostname_tags_chunk_size)) .into_iter() - .chunks(CONFIG.queue.hostname_tags_chunk_size) + .enumerate() { - clusters.push(Cluster::multiple_tag(facility_id, show_id, chunk.collect())) + clusters.push(Cluster::chunked( + facility_id, + show_id, + "hostname", + idx, + chunk.collect(), + )) } } - // Chunk Hardware tags + // Chunk Hardware tags (BTreeSet ensures deterministic sorted order) + // Hardware shares the same chunk size as manual to simplify configuration for ((show_id, facility_id), tags) in hardware_tags.into_iter() { - for chunk in &tags + for (idx, chunk) in (&tags.into_iter().chunks(CONFIG.queue.manual_tags_chunk_size)) .into_iter() - // Hardware share the same size as manual to simplify configuration - .chunks(CONFIG.queue.manual_tags_chunk_size) + .enumerate() { - clusters.push(Cluster::multiple_tag(facility_id, show_id, chunk.collect())) + clusters.push(Cluster::chunked( + facility_id, + show_id, + "hardware", + idx, + chunk.collect(), + )) } } diff --git a/rust/crates/scheduler/src/cluster_key.rs b/rust/crates/scheduler/src/cluster_key.rs index 0aaafeb42..dcdc4dc50 100644 --- a/rust/crates/scheduler/src/cluster_key.rs +++ b/rust/crates/scheduler/src/cluster_key.rs @@ -21,6 +21,17 @@ pub enum TagType { Hardware, } +impl TagType { + pub fn as_str(&self) -> &'static str { + match self { + TagType::Alloc => "alloc", + TagType::HostName => "hostname", + TagType::Manual => "manual", + TagType::Hardware => "hardware", + } + } +} + #[derive(Serialize, Deserialize, Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] pub struct Tag { pub name: String, diff --git a/rust/crates/scheduler/src/config/mod.rs b/rust/crates/scheduler/src/config/mod.rs index 0010de6be..fd3769803 100644 --- a/rust/crates/scheduler/src/config/mod.rs +++ b/rust/crates/scheduler/src/config/mod.rs @@ -288,6 +288,11 @@ pub struct OrchestratorConfig { /// Graceful shutdown timeout before force-killing in-flight work (default: 30s) #[serde(with = "humantime_serde")] pub shutdown_timeout: Duration, + + /// How long a cluster assignment is preserved before becoming eligible for + /// redistribution. Prevents new instances from remaining idle. (default: 120s) + #[serde(with = "humantime_serde")] + pub assignment_ttl: Duration, } impl Default for OrchestratorConfig { @@ -300,6 +305,7 @@ impl Default for OrchestratorConfig { election_interval: Duration::from_secs(10), capacity: 100, shutdown_timeout: Duration::from_secs(30), + assignment_ttl: Duration::from_secs(120), } } } diff --git a/rust/crates/scheduler/src/main.rs b/rust/crates/scheduler/src/main.rs index 19c3eaf65..98f5a6eb5 100644 --- a/rust/crates/scheduler/src/main.rs +++ b/rust/crates/scheduler/src/main.rs @@ -216,7 +216,7 @@ impl JobQueueCli { let show_id = cluster::get_show_id(&manual_tag.show) .await .wrap_err(format!("Could not find show {}.", manual_tag.show))?; - clusters.push(Cluster::multiple_tag( + clusters.push(Cluster::from_tags( *facility_id, show_id, manual_tag diff --git a/rust/crates/scheduler/src/orchestrator/dao.rs b/rust/crates/scheduler/src/orchestrator/dao.rs index 81760fa5b..43b765103 100644 --- a/rust/crates/scheduler/src/orchestrator/dao.rs +++ b/rust/crates/scheduler/src/orchestrator/dao.rs @@ -10,6 +10,7 @@ // or implied. See the License for the specific language governing permissions and limitations under // the License. +use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; @@ -17,6 +18,8 @@ use miette::{IntoDiagnostic, Result}; use sqlx::{Pool, Postgres}; use uuid::Uuid; +use crate::dao::helpers::parse_uuid; + use crate::cluster::Cluster; use crate::pgpool::connection_pool; @@ -41,7 +44,8 @@ pub struct InstanceRow { pub struct ClusterAssignmentRow { pub pk_assignment: String, pub pk_instance: String, - pub str_cluster: String, + pub str_cluster_id: String, + pub str_cluster_json: String, pub int_version: i32, } @@ -85,26 +89,26 @@ WHERE ts_heartbeat >= NOW() - $1::interval // --- Cluster assignment queries --- static QUERY_ASSIGNMENTS_FOR_INSTANCE: &str = r#" -SELECT pk_assignment, pk_instance, str_cluster, int_version +SELECT pk_assignment, pk_instance, str_cluster_id, str_cluster_json, int_version FROM scheduler_cluster_assignment WHERE pk_instance = $1 "#; static QUERY_ALL_ASSIGNMENTS: &str = r#" -SELECT pk_assignment, pk_instance, str_cluster, int_version +SELECT pk_assignment, pk_instance, str_cluster_id, str_cluster_json, int_version FROM scheduler_cluster_assignment "#; static UPSERT_ASSIGNMENT: &str = r#" -INSERT INTO scheduler_cluster_assignment (pk_instance, str_cluster, int_version, ts_assigned) -VALUES ($1, $2, 0, NOW()) -ON CONFLICT (str_cluster) -DO UPDATE SET pk_instance = $1, int_version = scheduler_cluster_assignment.int_version + 1, ts_assigned = NOW() +INSERT INTO scheduler_cluster_assignment (pk_instance, str_cluster_id, str_cluster_json, int_version, ts_assigned) +VALUES ($1, $2, $3, 0, NOW()) +ON CONFLICT (str_cluster_id) +DO UPDATE SET pk_instance = $1, str_cluster_json = $3, int_version = scheduler_cluster_assignment.int_version + 1, ts_assigned = NOW() "#; -static DELETE_ASSIGNMENT_BY_CLUSTER: &str = r#" +static DELETE_ASSIGNMENT_BY_CLUSTER_ID: &str = r#" DELETE FROM scheduler_cluster_assignment -WHERE str_cluster = $1 +WHERE str_cluster_id = $1 "#; // --- Advisory lock --- @@ -187,12 +191,16 @@ impl OrchestratorDao { pub async fn get_live_instances( &self, failure_threshold: Duration, - ) -> Result, sqlx::Error> { + ) -> Result, sqlx::Error> { let interval = format!("{} seconds", failure_threshold.as_secs()); - sqlx::query_as::<_, InstanceRow>(QUERY_LIVE_INSTANCES) + let rows = sqlx::query_as::<_, InstanceRow>(QUERY_LIVE_INSTANCES) .bind(interval) .fetch_all(&*self.connection_pool) - .await + .await?; + Ok(rows + .into_iter() + .map(|r| (parse_uuid(&r.pk_instance), r)) + .collect()) } // --- Cluster assignment operations --- @@ -207,14 +215,22 @@ impl OrchestratorDao { .await } - pub async fn get_all_assignments(&self) -> Result, sqlx::Error> { - sqlx::query_as::<_, ClusterAssignmentRow>(QUERY_ALL_ASSIGNMENTS) + /// Returns all cluster assignments as a map of cluster ID to assigned instance ID. + /// + /// The key is the stable cluster ID (e.g. `"{facility}:{show}:alloc:{tag}"`), + /// which is independent of the exact tag content for chunked clusters. + pub async fn get_all_assignments(&self) -> Result, sqlx::Error> { + let rows = sqlx::query_as::<_, ClusterAssignmentRow>(QUERY_ALL_ASSIGNMENTS) .fetch_all(&*self.connection_pool) - .await + .await?; + Ok(rows + .into_iter() + .map(|r| (r.str_cluster_id, parse_uuid(&r.pk_instance))) + .collect()) } - /// Upserts a cluster assignment. If the cluster is already assigned, updates the instance - /// and bumps the version. + /// Upserts a cluster assignment. If the cluster is already assigned, updates the instance, + /// refreshes the JSON content, and bumps the version. pub async fn upsert_assignment( &self, instance_id: Uuid, @@ -224,17 +240,19 @@ impl OrchestratorDao { serde_json::to_string(cluster).expect("Failed to serialize Cluster to JSON"); sqlx::query(UPSERT_ASSIGNMENT) .bind(instance_id.to_string()) + .bind(&cluster.id) .bind(cluster_json) .execute(&*self.connection_pool) .await?; Ok(()) } - pub async fn delete_assignment_by_cluster(&self, cluster: &Cluster) -> Result<(), sqlx::Error> { - let cluster_json = - serde_json::to_string(cluster).expect("Failed to serialize Cluster to JSON"); - sqlx::query(DELETE_ASSIGNMENT_BY_CLUSTER) - .bind(cluster_json) + pub async fn delete_assignment_by_cluster_id( + &self, + cluster_id: &str, + ) -> Result<(), sqlx::Error> { + sqlx::query(DELETE_ASSIGNMENT_BY_CLUSTER_ID) + .bind(cluster_id) .execute(&*self.connection_pool) .await?; Ok(()) diff --git a/rust/crates/scheduler/src/orchestrator/distributor.rs b/rust/crates/scheduler/src/orchestrator/distributor.rs index cb9404ca8..1257a7a80 100644 --- a/rust/crates/scheduler/src/orchestrator/distributor.rs +++ b/rust/crates/scheduler/src/orchestrator/distributor.rs @@ -18,7 +18,7 @@ use tracing::{debug, info, warn}; use uuid::Uuid; use crate::cluster::{Cluster, ClusterFeed}; -use crate::dao::helpers::parse_uuid; +use crate::config::CONFIG; use super::dao::{InstanceRow, OrchestratorDao}; @@ -34,6 +34,9 @@ struct RateSnapshot { pub struct Distributor { /// Previous snapshots of each instance's jobs_queried counter, keyed by instance ID. previous_snapshots: HashMap, + /// Tracks when each cluster assignment was created or last renewed, keyed by cluster ID. + /// Used for TTL-based expiration to enable redistribution when new instances join. + assignment_ages: HashMap, } impl Distributor { @@ -44,9 +47,57 @@ impl Distributor { pub fn new() -> Self { Distributor { previous_snapshots: HashMap::new(), + assignment_ages: HashMap::new(), } } + /// Seeds assignment ages from existing database assignments. + /// + /// Called on leader promotion to give existing assignments a full TTL grace period + /// before they become eligible for redistribution. This prevents a thundering-herd + /// redistribution when a new leader takes over. + pub fn seed_ages(&mut self, current_assignments: &HashMap) { + let now = Instant::now(); + self.assignment_ages = current_assignments + .keys() + .map(|cluster_id| (cluster_id.clone(), now)) + .collect(); + } + + /// Filters out assignments whose age exceeds the configured TTL. + /// + /// Expired assignments appear "unassigned" to `compute_assignments`, causing them + /// to go through the load-balanced second pass. Assignments with no tracked age + /// (e.g. pre-existing before the TTL feature) are preserved. + fn filter_expired_assignments( + &self, + current_assignments: &HashMap, + now: Instant, + ) -> HashMap { + let assignment_ttl = CONFIG.orchestrator.assignment_ttl; + current_assignments + .iter() + .filter(|(cluster_id, _)| match self.assignment_ages.get(*cluster_id) { + Some(created_at) => { + let age = now.duration_since(*created_at); + if age >= assignment_ttl { + debug!( + "Assignment for cluster {} expired (age: {:.1}s, ttl: {:.1}s)", + cluster_id, + age.as_secs_f64(), + assignment_ttl.as_secs_f64() + ); + false + } else { + true + } + } + None => true, + }) + .map(|(k, v)| (k.clone(), *v)) + .collect() + } + /// Runs one distribution cycle: loads clusters, reads instances, computes assignments. /// /// Cleans up dead instances, loads all clusters from the database, reads live @@ -69,7 +120,7 @@ impl Distributor { ignore_tags: &[String], failure_threshold: Duration, ) -> Result<()> { - // 1. Clean up dead instances (cascade deletes their assignments) + // Clean up dead instances (cascade deletes their assignments) let dead = dao .delete_dead_instances(failure_threshold) .await @@ -82,16 +133,15 @@ impl Distributor { } } - // 2. Load all clusters from the database + // Load all clusters from the database let all_clusters = ClusterFeed::load_clusters(None, ignore_tags, None).await?; - let all_clusters = ClusterFeed::filter_clusters(all_clusters, ignore_tags); if all_clusters.is_empty() { debug!("No clusters found in database"); return Ok(()); } - // 3. Read live instances + // Read live instances let instances = dao .get_live_instances(failure_threshold) .await @@ -102,68 +152,66 @@ impl Distributor { return Ok(()); } - // 4. Read current assignments + // Read current assignments (cluster_id -> instance_id) let current_assignments = dao.get_all_assignments().await.into_diagnostic()?; - // Build a map: cluster_json -> currently assigned instance_id - let mut current_map: HashMap = HashMap::new(); - for assignment in ¤t_assignments { - current_map.insert( - assignment.str_cluster.clone(), - parse_uuid(&assignment.pk_instance), - ); - } - - // Build set of live instance IDs for quick lookup - let live_ids: std::collections::HashSet = instances - .iter() - .map(|i| parse_uuid(&i.pk_instance)) - .collect(); + // Compute job query rates per instance (enriches instances with their rate) + let rated_instances = self.compute_rates(instances); - // 5. Compute job query rates per instance let now = Instant::now(); - let rates = self.compute_rates(&instances, now); + let active_assignments = self.filter_expired_assignments(¤t_assignments, now); - // 6. Compute new assignments + // Compute new assignments using filtered (non-expired) assignments let new_assignments = - Self::compute_assignments(&all_clusters, &instances, ¤t_map, &live_ids, &rates); + Self::compute_assignments(&all_clusters, &rated_instances, &active_assignments); + + // Update assignment ages: + // - New or changed assignments get a fresh timestamp + // - Unchanged assignments keep their existing age + // - Removed clusters get cleaned up + let valid_cluster_ids: std::collections::HashSet<&String> = + new_assignments.keys().collect(); + self.assignment_ages + .retain(|cluster_id, _| valid_cluster_ids.contains(cluster_id)); + for (cluster_id, &new_instance) in &new_assignments { + let is_new_or_changed = match active_assignments.get(cluster_id) { + Some(&prev_instance) => prev_instance != new_instance, + None => true, + }; + if is_new_or_changed { + self.assignment_ages.insert(cluster_id.clone(), now); + } + } - // 7. Apply assignment changes to the database - self.apply_assignments(dao, &all_clusters, &new_assignments, ¤t_map) + // Apply assignment changes to the database (uses original current_assignments + // to detect what actually changed in the DB) + self.apply_assignments(dao, &all_clusters, &new_assignments, ¤t_assignments) .await?; - // 8. Update snapshots for next cycle - for instance in &instances { - self.previous_snapshots.insert( - parse_uuid(&instance.pk_instance), - RateSnapshot { - jobs_queried: instance.float_jobs_queried, - timestamp: now, - }, - ); - } - - // 9. Update metrics - crate::metrics::set_orchestrator_instances_alive(instances.len()); + // Update metrics + crate::metrics::set_orchestrator_instances_alive(rated_instances.len()); crate::metrics::increment_orchestrator_rebalance(); debug!( "Distribution complete: {} clusters across {} instances", all_clusters.len(), - instances.len() + rated_instances.len() ); Ok(()) } /// Computes the job query rate for each instance based on the delta from previous snapshots. - /// Returns a map of instance_id -> rate (jobs/second). + /// Returns a map of instance_id -> (InstanceRow, rate) where rate is jobs/second. /// If no previous snapshot exists (bootstrap), rate is 0.0. - fn compute_rates(&self, instances: &[InstanceRow], now: Instant) -> HashMap { - let mut rates = HashMap::new(); + fn compute_rates( + &mut self, + instances: HashMap, + ) -> HashMap { + let now = Instant::now(); + let mut rated_instances = HashMap::new(); - for instance in instances { - let id = parse_uuid(&instance.pk_instance); + for (id, instance) in instances { let rate = if let Some(prev) = self.previous_snapshots.get(&id) { let delta_jobs = instance.float_jobs_queried - prev.jobs_queried; let delta_secs = now.duration_since(prev.timestamp).as_secs_f64(); @@ -177,67 +225,64 @@ impl Distributor { 0.0 }; - rates.insert(id, rate); + // Update snapshots for next cycle + self.previous_snapshots.insert( + id, + RateSnapshot { + jobs_queried: instance.float_jobs_queried, + timestamp: Instant::now(), + }, + ); + rated_instances.insert(id, (instance, rate)); } - rates + rated_instances } /// Pure function that computes the optimal cluster-to-instance assignment. /// /// Strategy: - /// 1. Preserve stable assignments (cluster stays on same live instance if eligible). + /// 1. Preserve stable assignments (cluster stays on same live instance). /// 2. Assign unassigned clusters to the instance with the lowest load ratio. fn compute_assignments( all_clusters: &[Cluster], - instances: &[InstanceRow], + rated_instances: &HashMap, current_map: &HashMap, - live_ids: &std::collections::HashSet, - rates: &HashMap, ) -> HashMap { let mut assignments: HashMap = HashMap::new(); - // Parse instance IDs once - let instance_ids: Vec = instances - .iter() - .map(|i| parse_uuid(&i.pk_instance)) - .collect(); + // Collect instance IDs + let instance_ids: Vec = rated_instances.keys().copied().collect(); // Track load per instance (weighted by rate / capacity) let mut instance_load: HashMap = instance_ids .iter() - .map(|id| (*id, *rates.get(id).unwrap_or(&0.0))) + .map(|id| (*id, rated_instances.get(id).map_or(0.0, |(_, rate)| *rate))) .collect(); // Track assignment count per instance for bootstrap (when all rates are 0) let mut instance_count: HashMap = instance_ids.iter().map(|id| (*id, 0)).collect(); - let all_rates_zero = rates.values().all(|r| *r == 0.0); + let all_rates_zero = rated_instances.values().all(|(_, rate)| *rate == 0.0); // Build instance capacity map - let capacity_map: HashMap = instances + let capacity_map: HashMap = rated_instances .iter() - .zip(instance_ids.iter()) - .map(|(i, id)| (*id, i.int_capacity as f64)) + .map(|(&id, (inst, _))| (id, inst.int_capacity as f64)) .collect(); // Build facility map for affinity filtering - let instance_facilities: HashMap> = instances + let instance_facilities: HashMap> = rated_instances .iter() - .zip(instance_ids.iter()) - .map(|(i, id)| (*id, i.str_facility.clone())) + .map(|(&id, (inst, _))| (id, inst.str_facility.clone())) .collect(); - // First pass: preserve stable assignments + // First pass: preserve stable assignments where the instance is still alive for cluster in all_clusters { - let cluster_json = serde_json::to_string(cluster).expect("Failed to serialize Cluster"); - - if let Some(¤t_instance) = current_map.get(&cluster_json) { - if live_ids.contains(¤t_instance) - && Self::is_facility_eligible(cluster, &instance_facilities, current_instance) - { - assignments.insert(cluster_json, current_instance); + if let Some(¤t_instance) = current_map.get(&cluster.id) { + if rated_instances.contains_key(¤t_instance) { + assignments.insert(cluster.id.clone(), current_instance); if let Some(count) = instance_count.get_mut(¤t_instance) { *count += 1; } @@ -247,9 +292,7 @@ impl Distributor { // Second pass: assign unassigned clusters for cluster in all_clusters { - let cluster_json = serde_json::to_string(cluster).expect("Failed to serialize Cluster"); - - if assignments.contains_key(&cluster_json) { + if assignments.contains_key(&cluster.id) { continue; // Already assigned in first pass } @@ -299,7 +342,7 @@ impl Distributor { .unwrap() }; - assignments.insert(cluster_json, best); + assignments.insert(cluster.id.clone(), best); if let Some(count) = instance_count.get_mut(&best) { *count += 1; } @@ -342,10 +385,8 @@ impl Distributor { ) -> Result<()> { // Upsert assignments that are new or changed for cluster in all_clusters { - let cluster_json = serde_json::to_string(cluster).expect("Failed to serialize Cluster"); - - if let Some(&new_instance) = new_assignments.get(&cluster_json) { - let needs_upsert = match current_map.get(&cluster_json) { + if let Some(&new_instance) = new_assignments.get(&cluster.id) { + let needs_upsert = match current_map.get(&cluster.id) { Some(¤t_instance) => current_instance != new_instance, None => true, }; @@ -359,16 +400,13 @@ impl Distributor { } } - // Delete assignments for clusters that no longer exist in the database - for cluster_json in current_map.keys() { - if !new_assignments.contains_key(cluster_json) { - // This cluster no longer exists — try to parse and delete - if let Ok(cluster) = serde_json::from_str::(cluster_json) { - dao.delete_assignment_by_cluster(&cluster) - .await - .into_diagnostic()?; - debug!("Removed stale assignment for cluster {}", cluster); - } + // Delete assignments for clusters that no longer exist + for cluster_id in current_map.keys() { + if !new_assignments.contains_key(cluster_id) { + dao.delete_assignment_by_cluster_id(cluster_id) + .await + .into_diagnostic()?; + debug!("Removed stale assignment for cluster_id {}", cluster_id); } } @@ -378,13 +416,12 @@ impl Distributor { #[cfg(test)] mod tests { - use std::collections::{HashMap, HashSet}; + use std::collections::HashMap; use uuid::Uuid; use crate::cluster::Cluster; use crate::cluster_key::{Tag, TagType}; - use crate::dao::helpers::parse_uuid; use super::{Distributor, InstanceRow}; @@ -400,12 +437,21 @@ mod tests { } fn make_instance(id: Uuid, facility: Option<&str>, capacity: i32) -> InstanceRow { + make_instance_with_jobs(id, facility, capacity, 0.0) + } + + fn make_instance_with_jobs( + id: Uuid, + facility: Option<&str>, + capacity: i32, + jobs_queried: f64, + ) -> InstanceRow { InstanceRow { pk_instance: id.to_string(), str_name: format!("test:{}", id), str_facility: facility.map(String::from), int_capacity: capacity, - float_jobs_queried: 0.0, + float_jobs_queried: jobs_queried, b_draining: false, } } @@ -421,28 +467,16 @@ mod tests { let inst_a = Uuid::new_v4(); let inst_b = Uuid::new_v4(); - let instances = vec![ - make_instance(inst_a, None, 100), - make_instance(inst_b, None, 100), - ]; + let rated_instances: HashMap = [ + (inst_a, (make_instance(inst_a, None, 100), 0.0)), + (inst_b, (make_instance(inst_b, None, 100), 0.0)), + ] + .into(); - let live_ids: HashSet = instances - .iter() - .map(|i| parse_uuid(&i.pk_instance)) - .collect(); - let rates: HashMap = instances - .iter() - .map(|i| (parse_uuid(&i.pk_instance), 0.0)) - .collect(); let current_map = HashMap::new(); - let assignments = Distributor::compute_assignments( - &clusters, - &instances, - ¤t_map, - &live_ids, - &rates, - ); + let assignments = + Distributor::compute_assignments(&clusters, &rated_instances, ¤t_map); // All clusters should be assigned assert_eq!(assignments.len(), 6); @@ -465,33 +499,21 @@ mod tests { let inst_a = Uuid::new_v4(); let inst_b = Uuid::new_v4(); - let instances = vec![ - make_instance(inst_a, None, 100), - make_instance(inst_b, None, 100), - ]; - - let live_ids: HashSet = instances - .iter() - .map(|i| parse_uuid(&i.pk_instance)) - .collect(); - let rates: HashMap = vec![(inst_a, 100.0), (inst_b, 50.0)].into_iter().collect(); + let rated_instances: HashMap = [ + (inst_a, (make_instance(inst_a, None, 100), 100.0)), + (inst_b, (make_instance(inst_b, None, 100), 50.0)), + ] + .into(); // Pre-assign all clusters to inst_a - let current_map: HashMap = clusters - .iter() - .map(|c| (serde_json::to_string(c).unwrap(), inst_a)) - .collect(); + let current_map: HashMap = + clusters.iter().map(|c| (c.id.clone(), inst_a)).collect(); - let assignments = Distributor::compute_assignments( - &clusters, - &instances, - ¤t_map, - &live_ids, - &rates, - ); + let assignments = + Distributor::compute_assignments(&clusters, &rated_instances, ¤t_map); // All clusters should stay with inst_a (stability) - for (_, instance) in &assignments { + for instance in assignments.values() { assert_eq!(*instance, inst_a); } } @@ -508,34 +530,31 @@ mod tests { let inst_a = Uuid::new_v4(); let inst_b = Uuid::new_v4(); - let instances = vec![ - make_instance(inst_a, Some(&facility_a.to_string()), 100), - make_instance(inst_b, Some(&facility_b.to_string()), 100), - ]; + let rated_instances: HashMap = [ + ( + inst_a, + ( + make_instance(inst_a, Some(&facility_a.to_string()), 100), + 0.0, + ), + ), + ( + inst_b, + ( + make_instance(inst_b, Some(&facility_b.to_string()), 100), + 0.0, + ), + ), + ] + .into(); - let live_ids: HashSet = instances - .iter() - .map(|i| parse_uuid(&i.pk_instance)) - .collect(); - let rates: HashMap = instances - .iter() - .map(|i| (parse_uuid(&i.pk_instance), 0.0)) - .collect(); let current_map = HashMap::new(); - let assignments = Distributor::compute_assignments( - &clusters, - &instances, - ¤t_map, - &live_ids, - &rates, - ); - - let cluster_a_json = serde_json::to_string(&cluster_a).unwrap(); - let cluster_b_json = serde_json::to_string(&cluster_b).unwrap(); + let assignments = + Distributor::compute_assignments(&clusters, &rated_instances, ¤t_map); - assert_eq!(assignments[&cluster_a_json], inst_a); - assert_eq!(assignments[&cluster_b_json], inst_b); + assert_eq!(assignments[&cluster_a.id], inst_a); + assert_eq!(assignments[&cluster_b.id], inst_b); } #[test] @@ -549,28 +568,16 @@ mod tests { let inst_a = Uuid::new_v4(); let inst_b = Uuid::new_v4(); - let instances = vec![ - make_instance(inst_a, None, 200), // 2x capacity - make_instance(inst_b, None, 100), - ]; + let rated_instances: HashMap = [ + (inst_a, (make_instance(inst_a, None, 200), 0.0)), // 2x capacity + (inst_b, (make_instance(inst_b, None, 100), 0.0)), + ] + .into(); - let live_ids: HashSet = instances - .iter() - .map(|i| parse_uuid(&i.pk_instance)) - .collect(); - let rates: HashMap = instances - .iter() - .map(|i| (parse_uuid(&i.pk_instance), 0.0)) - .collect(); let current_map = HashMap::new(); - let assignments = Distributor::compute_assignments( - &clusters, - &instances, - ¤t_map, - &live_ids, - &rates, - ); + let assignments = + Distributor::compute_assignments(&clusters, &rated_instances, ¤t_map); let count_a = assignments.values().filter(|&&v| v == inst_a).count(); let count_b = assignments.values().filter(|&&v| v == inst_b).count(); @@ -579,4 +586,321 @@ mod tests { assert_eq!(count_a, 6); assert_eq!(count_b, 3); } + + #[test] + fn test_compute_rates_bootstrap_returns_zero() { + let mut distributor = Distributor::new(); + let inst_a = Uuid::new_v4(); + let inst_b = Uuid::new_v4(); + + let instances: HashMap = [ + (inst_a, make_instance_with_jobs(inst_a, None, 100, 50.0)), + (inst_b, make_instance_with_jobs(inst_b, None, 100, 30.0)), + ] + .into(); + + let rated = distributor.compute_rates(instances); + + // Bootstrap: no previous snapshots, all rates should be 0.0 + assert_eq!(rated[&inst_a].1, 0.0); + assert_eq!(rated[&inst_b].1, 0.0); + } + + #[test] + fn test_compute_rates_positive_delta() { + let mut distributor = Distributor::new(); + let inst_a = Uuid::new_v4(); + let inst_b = Uuid::new_v4(); + + // Cycle 1: bootstrap — populates snapshots + let instances_t0: HashMap = [ + (inst_a, make_instance_with_jobs(inst_a, None, 100, 100.0)), + (inst_b, make_instance_with_jobs(inst_b, None, 100, 50.0)), + ] + .into(); + let _ = distributor.compute_rates(instances_t0); + + // Small delay to ensure non-zero elapsed time + std::thread::sleep(std::time::Duration::from_millis(10)); + + // Cycle 2: inst_a queried 200 more, inst_b queried 50 more + let instances_t1: HashMap = [ + (inst_a, make_instance_with_jobs(inst_a, None, 100, 300.0)), + (inst_b, make_instance_with_jobs(inst_b, None, 100, 100.0)), + ] + .into(); + let rated = distributor.compute_rates(instances_t1); + + assert!(rated[&inst_a].1 > 0.0); + assert!(rated[&inst_b].1 > 0.0); + // inst_a delta (200) is 4x inst_b delta (50) + assert!(rated[&inst_a].1 > rated[&inst_b].1); + } + + #[test] + fn test_compute_rates_negative_delta_returns_zero() { + let mut distributor = Distributor::new(); + let inst_a = Uuid::new_v4(); + + // Cycle 1: bootstrap with high value + let instances_t0: HashMap = + [(inst_a, make_instance_with_jobs(inst_a, None, 100, 500.0))].into(); + let _ = distributor.compute_rates(instances_t0); + + std::thread::sleep(std::time::Duration::from_millis(10)); + + // Cycle 2: counter decreased (e.g. reset) + let instances_t1: HashMap = + [(inst_a, make_instance_with_jobs(inst_a, None, 100, 100.0))].into(); + let rated = distributor.compute_rates(instances_t1); + + assert_eq!(rated[&inst_a].1, 0.0); + } + + #[test] + fn test_compute_rates_feeds_distribution() { + let mut distributor = Distributor::new(); + let facility = Uuid::new_v4(); + let show = Uuid::new_v4(); + + let clusters: Vec = (0..6) + .map(|i| make_cluster(facility, show, &format!("rate_tag{}", i))) + .collect(); + + let inst_a = Uuid::new_v4(); + let inst_b = Uuid::new_v4(); + + // Cycle 1: bootstrap + let instances_t0: HashMap = [ + (inst_a, make_instance_with_jobs(inst_a, None, 100, 0.0)), + (inst_b, make_instance_with_jobs(inst_b, None, 100, 0.0)), + ] + .into(); + let _ = distributor.compute_rates(instances_t0); + + std::thread::sleep(std::time::Duration::from_millis(10)); + + // Cycle 2: inst_a is much busier than inst_b + let instances_t1: HashMap = [ + (inst_a, make_instance_with_jobs(inst_a, None, 100, 1000.0)), + (inst_b, make_instance_with_jobs(inst_b, None, 100, 100.0)), + ] + .into(); + let rated = distributor.compute_rates(instances_t1); + + // Rates should reflect that inst_a is busier + assert!(rated[&inst_a].1 > rated[&inst_b].1); + + // Use computed rated instances in assignment (no prior assignments) + let current_map = HashMap::new(); + let assignments = Distributor::compute_assignments(&clusters, &rated, ¤t_map); + + assert_eq!(assignments.len(), 6); + + let count_a = assignments.values().filter(|&&v| v == inst_a).count(); + let count_b = assignments.values().filter(|&&v| v == inst_b).count(); + + // The less busy instance (inst_b) should get more clusters + assert!( + count_b > count_a, + "inst_b (less busy) should get more clusters: count_a={}, count_b={}", + count_a, + count_b + ); + } + + #[test] + fn test_ttl_expiration_enables_redistribution() { + use std::time::{Duration, Instant}; + + let facility = Uuid::new_v4(); + let show = Uuid::new_v4(); + + let clusters: Vec = (0..6) + .map(|i| make_cluster(facility, show, &format!("ttl_tag{}", i))) + .collect(); + + let inst_a = Uuid::new_v4(); + let inst_b = Uuid::new_v4(); + + // All clusters assigned to inst_a + let current_map: HashMap = + clusters.iter().map(|c| (c.id.clone(), inst_a)).collect(); + + let mut distributor = Distributor::new(); + + // Seed ages with a timestamp far in the past (expired) + let expired_time = Instant::now() - Duration::from_secs(300); + for cluster in &clusters { + distributor + .assignment_ages + .insert(cluster.id.clone(), expired_time); + } + + // Filter out expired assignments (simulating what distribute() does) + let assignment_ttl = Duration::from_secs(120); + let now = Instant::now(); + let active_assignments: HashMap = current_map + .iter() + .filter(|(cluster_id, _)| { + match distributor.assignment_ages.get(*cluster_id) { + Some(created_at) => now.duration_since(*created_at) < assignment_ttl, + None => true, + } + }) + .map(|(k, v)| (k.clone(), *v)) + .collect(); + + // All assignments should be expired + assert!( + active_assignments.is_empty(), + "All assignments should have expired" + ); + + // Now compute_assignments with both instances and no active assignments + let rated_instances: HashMap = [ + (inst_a, (make_instance(inst_a, None, 100), 0.0)), + (inst_b, (make_instance(inst_b, None, 100), 0.0)), + ] + .into(); + + let assignments = + Distributor::compute_assignments(&clusters, &rated_instances, &active_assignments); + + assert_eq!(assignments.len(), 6); + + // With no active assignments, clusters should be evenly distributed + let count_a = assignments.values().filter(|&&v| v == inst_a).count(); + let count_b = assignments.values().filter(|&&v| v == inst_b).count(); + assert_eq!(count_a, 3); + assert_eq!(count_b, 3); + } + + #[test] + fn test_seed_ages_gives_grace_period() { + use std::time::{Duration, Instant}; + + let facility = Uuid::new_v4(); + let show = Uuid::new_v4(); + + let clusters: Vec = (0..4) + .map(|i| make_cluster(facility, show, &format!("seed_tag{}", i))) + .collect(); + + let inst_a = Uuid::new_v4(); + + // Simulate existing DB assignments + let current_assignments: HashMap = + clusters.iter().map(|c| (c.id.clone(), inst_a)).collect(); + + let mut distributor = Distributor::new(); + distributor.seed_ages(¤t_assignments); + + // Verify all ages were seeded + assert_eq!(distributor.assignment_ages.len(), clusters.len()); + + // Verify none are expired (they should be very recent) + let assignment_ttl = Duration::from_secs(120); + let now = Instant::now(); + for (cluster_id, created_at) in &distributor.assignment_ages { + let age = now.duration_since(*created_at); + assert!( + age < assignment_ttl, + "Seeded assignment for {} should not be expired (age: {:?})", + cluster_id, + age + ); + } + + // Filtering should keep all assignments active + let active_assignments: HashMap = current_assignments + .iter() + .filter(|(cluster_id, _)| { + match distributor.assignment_ages.get(*cluster_id) { + Some(created_at) => now.duration_since(*created_at) < assignment_ttl, + None => true, + } + }) + .map(|(k, v)| (k.clone(), *v)) + .collect(); + + assert_eq!( + active_assignments.len(), + current_assignments.len(), + "All seeded assignments should remain active" + ); + } + + #[test] + fn test_assignment_ages_updated_after_reassignment() { + use std::time::{Duration, Instant}; + + let facility = Uuid::new_v4(); + let show = Uuid::new_v4(); + + let clusters: Vec = (0..4) + .map(|i| make_cluster(facility, show, &format!("age_tag{}", i))) + .collect(); + + let inst_a = Uuid::new_v4(); + let inst_b = Uuid::new_v4(); + + let mut distributor = Distributor::new(); + + // Simulate: 2 clusters assigned to inst_a, 2 expired (will go through second pass) + let expired_time = Instant::now() - Duration::from_secs(300); + let fresh_time = Instant::now(); + + // First 2 clusters are expired, last 2 are fresh + distributor + .assignment_ages + .insert(clusters[0].id.clone(), expired_time); + distributor + .assignment_ages + .insert(clusters[1].id.clone(), expired_time); + distributor + .assignment_ages + .insert(clusters[2].id.clone(), fresh_time); + distributor + .assignment_ages + .insert(clusters[3].id.clone(), fresh_time); + + // All currently assigned to inst_a + let current_map: HashMap = + clusters.iter().map(|c| (c.id.clone(), inst_a)).collect(); + + // Filter expired + let assignment_ttl = Duration::from_secs(120); + let now = Instant::now(); + let active_assignments: HashMap = current_map + .iter() + .filter(|(cluster_id, _)| { + match distributor.assignment_ages.get(*cluster_id) { + Some(created_at) => now.duration_since(*created_at) < assignment_ttl, + None => true, + } + }) + .map(|(k, v)| (k.clone(), *v)) + .collect(); + + // Only 2 fresh assignments should remain + assert_eq!(active_assignments.len(), 2); + + // Compute assignments with 2 instances + let rated_instances: HashMap = [ + (inst_a, (make_instance(inst_a, None, 100), 0.0)), + (inst_b, (make_instance(inst_b, None, 100), 0.0)), + ] + .into(); + + let assignments = + Distributor::compute_assignments(&clusters, &rated_instances, &active_assignments); + + // All 4 should be assigned + assert_eq!(assignments.len(), 4); + + // The 2 fresh clusters stay with inst_a, the 2 expired ones get redistributed + assert_eq!(assignments[&clusters[2].id], inst_a); + assert_eq!(assignments[&clusters[3].id], inst_a); + } } diff --git a/rust/crates/scheduler/src/orchestrator/leader.rs b/rust/crates/scheduler/src/orchestrator/leader.rs index b7f4af41d..5f96df256 100644 --- a/rust/crates/scheduler/src/orchestrator/leader.rs +++ b/rust/crates/scheduler/src/orchestrator/leader.rs @@ -130,6 +130,12 @@ impl LeaderElection { crate::metrics::set_orchestrator_is_leader(true); // Reset distributor state for fresh snapshots distributor = Distributor::new(); + // Seed assignment ages from existing DB assignments so they + // get a full TTL grace period before redistribution + match dao.get_all_assignments().await { + Ok(assignments) => distributor.seed_ages(&assignments), + Err(e) => warn!("Failed to seed assignment ages: {}", e), + } } Ok(false) => { // Another instance holds the lock diff --git a/rust/crates/scheduler/src/orchestrator/sync.rs b/rust/crates/scheduler/src/orchestrator/sync.rs index acfacc77b..0f2b33414 100644 --- a/rust/crates/scheduler/src/orchestrator/sync.rs +++ b/rust/crates/scheduler/src/orchestrator/sync.rs @@ -63,12 +63,12 @@ impl ClusterSync { let clusters: Vec = assignments .into_iter() .filter_map(|row| { - match serde_json::from_str::(&row.str_cluster) { + match serde_json::from_str::(&row.str_cluster_json) { Ok(cluster) => Some(cluster), Err(e) => { error!( "Failed to deserialize cluster assignment: {}. JSON: {}", - e, row.str_cluster + e, row.str_cluster_json ); None } diff --git a/rust/crates/scheduler/tests/util.rs b/rust/crates/scheduler/tests/util.rs index c6c425053..d5a466891 100644 --- a/rust/crates/scheduler/tests/util.rs +++ b/rust/crates/scheduler/tests/util.rs @@ -590,7 +590,7 @@ pub async fn create_test_data( // Clusters. Chunk manual tags in approximatelly 4 groups for chunk in tags.chunks(tags.len() / 4) { - let cluster = Cluster::multiple_tag( + let cluster = Cluster::from_tags( facility_id, show_id, chunk From 93e0b3d0796b22d0d68cd5e5013a61506305ceb2 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Thu, 5 Mar 2026 20:35:34 -0800 Subject: [PATCH 03/16] Refactor distributor --- .../scheduler/src/orchestrator/distributor.rs | 152 +++++++++--------- 1 file changed, 73 insertions(+), 79 deletions(-) diff --git a/rust/crates/scheduler/src/orchestrator/distributor.rs b/rust/crates/scheduler/src/orchestrator/distributor.rs index 1257a7a80..58c6bcb34 100644 --- a/rust/crates/scheduler/src/orchestrator/distributor.rs +++ b/rust/crates/scheduler/src/orchestrator/distributor.rs @@ -77,23 +77,25 @@ impl Distributor { let assignment_ttl = CONFIG.orchestrator.assignment_ttl; current_assignments .iter() - .filter(|(cluster_id, _)| match self.assignment_ages.get(*cluster_id) { - Some(created_at) => { - let age = now.duration_since(*created_at); - if age >= assignment_ttl { - debug!( - "Assignment for cluster {} expired (age: {:.1}s, ttl: {:.1}s)", - cluster_id, - age.as_secs_f64(), - assignment_ttl.as_secs_f64() - ); - false - } else { - true + .filter( + |(cluster_id, _)| match self.assignment_ages.get(*cluster_id) { + Some(created_at) => { + let age = now.duration_since(*created_at); + if age >= assignment_ttl { + debug!( + "Assignment for cluster {} expired (age: {:.1}s, ttl: {:.1}s)", + cluster_id, + age.as_secs_f64(), + assignment_ttl.as_secs_f64() + ); + false + } else { + true + } } - } - None => true, - }) + None => true, + }, + ) .map(|(k, v)| (k.clone(), *v)) .collect() } @@ -152,7 +154,7 @@ impl Distributor { return Ok(()); } - // Read current assignments (cluster_id -> instance_id) + // Read current assignments, expired included (cluster_id -> instance_id) let current_assignments = dao.get_all_assignments().await.into_diagnostic()?; // Compute job query rates per instance (enriches instances with their rate) @@ -247,7 +249,7 @@ impl Distributor { fn compute_assignments( all_clusters: &[Cluster], rated_instances: &HashMap, - current_map: &HashMap, + active_assignments: &HashMap, ) -> HashMap { let mut assignments: HashMap = HashMap::new(); @@ -255,23 +257,16 @@ impl Distributor { let instance_ids: Vec = rated_instances.keys().copied().collect(); // Track load per instance (weighted by rate / capacity) - let mut instance_load: HashMap = instance_ids + let mut instance_load_count_capacity: HashMap = instance_ids .iter() - .map(|id| (*id, rated_instances.get(id).map_or(0.0, |(_, rate)| *rate))) + .map(|id| { + let (inst, rate) = rated_instances.get(id).unwrap(); + (*id, (*rate, 0, inst.int_capacity as f64)) + }) .collect(); - // Track assignment count per instance for bootstrap (when all rates are 0) - let mut instance_count: HashMap = - instance_ids.iter().map(|id| (*id, 0)).collect(); - let all_rates_zero = rated_instances.values().all(|(_, rate)| *rate == 0.0); - // Build instance capacity map - let capacity_map: HashMap = rated_instances - .iter() - .map(|(&id, (inst, _))| (id, inst.int_capacity as f64)) - .collect(); - // Build facility map for affinity filtering let instance_facilities: HashMap> = rated_instances .iter() @@ -280,10 +275,12 @@ impl Distributor { // First pass: preserve stable assignments where the instance is still alive for cluster in all_clusters { - if let Some(¤t_instance) = current_map.get(&cluster.id) { + if let Some(¤t_instance) = active_assignments.get(&cluster.id) { if rated_instances.contains_key(¤t_instance) { assignments.insert(cluster.id.clone(), current_instance); - if let Some(count) = instance_count.get_mut(¤t_instance) { + if let Some((_rate, count, _capacity)) = + instance_load_count_capacity.get_mut(¤t_instance) + { *count += 1; } } @@ -311,44 +308,41 @@ impl Distributor { continue; } - // Pick the instance with the lowest load - let best = if all_rates_zero { - // Bootstrap: distribute by count / capacity - *eligible - .iter() - .min_by(|a, b| { - let ratio_a = *instance_count.get(a).unwrap_or(&0) as f64 - / capacity_map.get(a).unwrap_or(&1.0); - let ratio_b = *instance_count.get(b).unwrap_or(&0) as f64 - / capacity_map.get(b).unwrap_or(&1.0); - ratio_a - .partial_cmp(&ratio_b) - .unwrap_or(std::cmp::Ordering::Equal) - }) - .unwrap() - } else { - // Rate-based: pick instance with lowest rate/capacity ratio - *eligible - .iter() - .min_by(|a, b| { - let ratio_a = instance_load.get(a).unwrap_or(&0.0) - / capacity_map.get(a).unwrap_or(&1.0); - let ratio_b = instance_load.get(b).unwrap_or(&0.0) - / capacity_map.get(b).unwrap_or(&1.0); - ratio_a - .partial_cmp(&ratio_b) - .unwrap_or(std::cmp::Ordering::Equal) - }) - .unwrap() - }; + let best = *eligible + .iter() + .min_by(|a, b| { + let (load_a, count_a, capacity_a) = *instance_load_count_capacity + .get(a) + .unwrap_or(&(0.0, 0, 1.0)); + let (load_b, count_b, capacity_b) = *instance_load_count_capacity + .get(b) + .unwrap_or(&(0.0, 0, 1.0)); + + // If all rates are 0, use count based comparison + let (ratio_a, ratio_b) = if all_rates_zero { + (count_a as f64 / capacity_a, count_b as f64 / capacity_b) + } else { + (load_a / capacity_a, load_b / capacity_b) + }; + ratio_a + .partial_cmp(&ratio_b) + .unwrap_or(std::cmp::Ordering::Equal) + }) + .unwrap(); assignments.insert(cluster.id.clone(), best); - if let Some(count) = instance_count.get_mut(&best) { + if let Some((load, count, _)) = instance_load_count_capacity.get_mut(&best) { *count += 1; - } - // For rate-based distribution, slightly increase load estimate for - // subsequent assignments within the same cycle - if let Some(load) = instance_load.get_mut(&best) { + + // For rate-based distribution, slightly increase load estimate for + // subsequent assignments within the same cycle. + // Without this bump, if instance B has the lowest rate, it would win *every* + // assignment in the loop, piling all unassigned clusters onto one instance. + // By adding `1.0` to load after each assignment, the loop simulates the expected + // increase in workload, so subsequent iterations see instance B as slightly + // busier and may pick a different instance instead. + // 1.0 is an arbitrary unit, but it's enough to spread clusters across instances + // rather than dumping them all on the least-loaded one. *load += 1.0; } } @@ -742,12 +736,12 @@ mod tests { let now = Instant::now(); let active_assignments: HashMap = current_map .iter() - .filter(|(cluster_id, _)| { - match distributor.assignment_ages.get(*cluster_id) { + .filter( + |(cluster_id, _)| match distributor.assignment_ages.get(*cluster_id) { Some(created_at) => now.duration_since(*created_at) < assignment_ttl, None => true, - } - }) + }, + ) .map(|(k, v)| (k.clone(), *v)) .collect(); @@ -815,12 +809,12 @@ mod tests { // Filtering should keep all assignments active let active_assignments: HashMap = current_assignments .iter() - .filter(|(cluster_id, _)| { - match distributor.assignment_ages.get(*cluster_id) { + .filter( + |(cluster_id, _)| match distributor.assignment_ages.get(*cluster_id) { Some(created_at) => now.duration_since(*created_at) < assignment_ttl, None => true, - } - }) + }, + ) .map(|(k, v)| (k.clone(), *v)) .collect(); @@ -874,12 +868,12 @@ mod tests { let now = Instant::now(); let active_assignments: HashMap = current_map .iter() - .filter(|(cluster_id, _)| { - match distributor.assignment_ages.get(*cluster_id) { + .filter( + |(cluster_id, _)| match distributor.assignment_ages.get(*cluster_id) { Some(created_at) => now.duration_since(*created_at) < assignment_ttl, None => true, - } - }) + }, + ) .map(|(k, v)| (k.clone(), *v)) .collect(); From 7a0e76bd888f385a92dfe516c441b8309429f8d7 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Wed, 8 Apr 2026 10:53:18 -0700 Subject: [PATCH 04/16] Refactor orchestration methods --- rust/crates/scheduler/Cargo.toml | 1 + rust/crates/scheduler/src/metrics/mod.rs | 13 ++- .../scheduler/src/orchestrator/distributor.rs | 17 +++- .../scheduler/src/orchestrator/leader.rs | 41 +++++---- rust/crates/scheduler/src/orchestrator/mod.rs | 5 ++ .../crates/scheduler/src/orchestrator/sync.rs | 83 ++++++++++--------- 6 files changed, 94 insertions(+), 66 deletions(-) diff --git a/rust/crates/scheduler/Cargo.toml b/rust/crates/scheduler/Cargo.toml index c669245b8..13b6d1327 100644 --- a/rust/crates/scheduler/Cargo.toml +++ b/rust/crates/scheduler/Cargo.toml @@ -58,6 +58,7 @@ axum = "0.7" tower-http = { version = "0.5", features = ["trace"] } urlencoding = "2.1" gethostname = "0.4" +rand = "0.8" [features] default = [] diff --git a/rust/crates/scheduler/src/metrics/mod.rs b/rust/crates/scheduler/src/metrics/mod.rs index 43daa0925..66f69bf55 100644 --- a/rust/crates/scheduler/src/metrics/mod.rs +++ b/rust/crates/scheduler/src/metrics/mod.rs @@ -16,9 +16,13 @@ use prometheus::{ register_counter, register_counter_vec, register_histogram, register_int_gauge, Counter, CounterVec, Encoder, Histogram, IntGauge, TextEncoder, }; +use std::sync::atomic::{AtomicBool, Ordering}; use std::time::Duration; use tracing::{error, info}; +/// Whether the scheduler is running in orchestrated mode. +pub static ORCHESTRATOR_ENABLED: AtomicBool = AtomicBool::new(false); + lazy_static! { // Job metrics from entrypoint.rs pub static ref JOBS_QUERIED_TOTAL: Counter = register_counter!( @@ -134,14 +138,15 @@ async fn metrics_handler() -> impl IntoResponse { /// This function runs indefinitely and only returns if the server fails to start /// Handler for the /health endpoint async fn health_handler() -> impl IntoResponse { - let assigned = ORCHESTRATOR_ASSIGNED_CLUSTERS.get(); - if assigned > 0 { - (axum::http::StatusCode::OK, "ok") - } else { + if ORCHESTRATOR_ENABLED.load(Ordering::Relaxed) + && ORCHESTRATOR_ASSIGNED_CLUSTERS.get() == 0 + { ( axum::http::StatusCode::SERVICE_UNAVAILABLE, "no clusters assigned", ) + } else { + (axum::http::StatusCode::OK, "ok") } } diff --git a/rust/crates/scheduler/src/orchestrator/distributor.rs b/rust/crates/scheduler/src/orchestrator/distributor.rs index 58c6bcb34..730d02fe7 100644 --- a/rust/crates/scheduler/src/orchestrator/distributor.rs +++ b/rust/crates/scheduler/src/orchestrator/distributor.rs @@ -13,6 +13,8 @@ use std::collections::HashMap; use std::time::{Duration, Instant}; +use rand::Rng; + use miette::{IntoDiagnostic, Result}; use tracing::{debug, info, warn}; use uuid::Uuid; @@ -53,14 +55,20 @@ impl Distributor { /// Seeds assignment ages from existing database assignments. /// - /// Called on leader promotion to give existing assignments a full TTL grace period - /// before they become eligible for redistribution. This prevents a thundering-herd - /// redistribution when a new leader takes over. + /// Called on leader promotion to give existing assignments a jittered age + /// spread across the TTL window. This prevents a thundering-herd redistribution + /// when a new leader takes over by staggering expiration times. pub fn seed_ages(&mut self, current_assignments: &HashMap) { let now = Instant::now(); + let ttl = CONFIG.orchestrator.assignment_ttl; + let mut rng = rand::thread_rng(); self.assignment_ages = current_assignments .keys() - .map(|cluster_id| (cluster_id.clone(), now)) + .map(|cluster_id| { + let random_age = + Duration::from_secs_f64(rng.gen_range(0.0..ttl.as_secs_f64())); + (cluster_id.clone(), now - random_age) + }) .collect(); } @@ -93,6 +101,7 @@ impl Distributor { true } } + // Assignments with no birth date live forever None => true, }, ) diff --git a/rust/crates/scheduler/src/orchestrator/leader.rs b/rust/crates/scheduler/src/orchestrator/leader.rs index 5f96df256..793eb0d88 100644 --- a/rust/crates/scheduler/src/orchestrator/leader.rs +++ b/rust/crates/scheduler/src/orchestrator/leader.rs @@ -88,15 +88,19 @@ impl LeaderElection { tokio::spawn(async move { let mut distributor = Distributor::new(); + let mut distribution_ticker = tokio::time::interval(distribution_interval); + let mut election_ticker = tokio::time::interval(election_interval); loop { - // Check for shutdown - if *shutdown.borrow() { - info!("Leader election loop shutting down"); - break; - } - if is_leader.load(Ordering::Relaxed) { + tokio::select! { + _ = distribution_ticker.tick() => {} + _ = shutdown.changed() => { + info!("Leader loop received shutdown signal"); + break; + } + } + // We are the leader — run one distribution cycle match distributor .distribute(&dao, &ignore_tags, failure_threshold) @@ -110,32 +114,36 @@ impl LeaderElection { is_leader.store(false, Ordering::Relaxed); warn!("Demoted from leader due to distribution failure"); crate::metrics::set_orchestrator_is_leader(false); + // Reset election ticker so we wait a full interval before retrying + election_ticker.reset(); } } - - // Wait for next distribution cycle or shutdown + } else { tokio::select! { - _ = tokio::time::sleep(distribution_interval) => {} + _ = election_ticker.tick() => {} _ = shutdown.changed() => { - info!("Leader loop received shutdown signal"); + info!("Election loop received shutdown signal"); break; } } - } else { + // Not leader — try to acquire the lock match dao.try_acquire_leader_lock(ORCHESTRATOR_LOCK_ID).await { Ok(true) => { + // Became the leader info!("Acquired leader lock — this instance is now the leader"); is_leader.store(true, Ordering::Relaxed); crate::metrics::set_orchestrator_is_leader(true); // Reset distributor state for fresh snapshots distributor = Distributor::new(); // Seed assignment ages from existing DB assignments so they - // get a full TTL grace period before redistribution + // get a grace period before redistribution match dao.get_all_assignments().await { Ok(assignments) => distributor.seed_ages(&assignments), Err(e) => warn!("Failed to seed assignment ages: {}", e), } + // Reset distribution ticker so first cycle runs after a full interval + distribution_ticker.reset(); } Ok(false) => { // Another instance holds the lock @@ -144,15 +152,6 @@ impl LeaderElection { warn!("Failed to attempt leader lock acquisition: {}", e); } } - - // Wait before retrying election or shutdown - tokio::select! { - _ = tokio::time::sleep(election_interval) => {} - _ = shutdown.changed() => { - info!("Election loop received shutdown signal"); - break; - } - } } } diff --git a/rust/crates/scheduler/src/orchestrator/mod.rs b/rust/crates/scheduler/src/orchestrator/mod.rs index d57f74e0a..5b7e8dbd0 100644 --- a/rust/crates/scheduler/src/orchestrator/mod.rs +++ b/rust/crates/scheduler/src/orchestrator/mod.rs @@ -20,8 +20,11 @@ use miette::Result; use tokio::sync::watch; use tracing::info; +use std::sync::atomic::Ordering; + use crate::cluster::ClusterFeed; use crate::config::CONFIG; +use crate::metrics::ORCHESTRATOR_ENABLED; use instance::InstanceManager; use leader::LeaderElection; @@ -43,6 +46,8 @@ use sync::ClusterSync; /// * `Ok(())` - Scheduler completed successfully /// * `Err(miette::Error)` - Fatal error during setup or pipeline execution pub async fn run(facility: Option, ignore_tags: Vec) -> Result<()> { + ORCHESTRATOR_ENABLED.store(true, Ordering::Relaxed); + // Shutdown signal: send `true` to stop all loops let (shutdown_tx, shutdown_rx) = watch::channel(false); diff --git a/rust/crates/scheduler/src/orchestrator/sync.rs b/rust/crates/scheduler/src/orchestrator/sync.rs index 0f2b33414..4ce83cfab 100644 --- a/rust/crates/scheduler/src/orchestrator/sync.rs +++ b/rust/crates/scheduler/src/orchestrator/sync.rs @@ -20,7 +20,7 @@ use uuid::Uuid; use crate::cluster::{Cluster, ClusterFeed}; use crate::config::CONFIG; -use super::dao::OrchestratorDao; +use super::dao::{ClusterAssignmentRow, OrchestratorDao}; /// Worker-side cluster synchronization. /// @@ -58,42 +58,7 @@ impl ClusterSync { loop { tokio::select! { _ = ticker.tick() => { - match dao.get_assignments_for_instance(instance_id).await { - Ok(assignments) => { - let clusters: Vec = assignments - .into_iter() - .filter_map(|row| { - match serde_json::from_str::(&row.str_cluster_json) { - Ok(cluster) => Some(cluster), - Err(e) => { - error!( - "Failed to deserialize cluster assignment: {}. JSON: {}", - e, row.str_cluster_json - ); - None - } - } - }) - .collect(); - - let count = clusters.len(); - cluster_feed.update_clusters(clusters); - crate::metrics::set_orchestrator_assigned_clusters(count); - - debug!( - instance_id = %instance_id, - "Synced {} cluster assignment(s)", - count - ); - } - Err(e) => { - error!( - instance_id = %instance_id, - "Failed to poll cluster assignments: {}", - e - ); - } - } + sync_assignments(instance_id, &dao, &cluster_feed).await; } _ = shutdown.changed() => { info!(instance_id = %instance_id, "Cluster sync loop shutting down"); @@ -104,3 +69,47 @@ impl ClusterSync { }) } } + +async fn sync_assignments( + instance_id: Uuid, + dao: &OrchestratorDao, + cluster_feed: &ClusterFeed, +) { + let assignments = match dao.get_assignments_for_instance(instance_id).await { + Ok(a) => a, + Err(e) => { + error!( + instance_id = %instance_id, + "Failed to poll cluster assignments: {}", e + ); + return; + } + }; + + let clusters: Vec = assignments + .into_iter() + .filter_map(|row| parse_cluster(row)) + .collect(); + + let count = clusters.len(); + cluster_feed.update_clusters(clusters); + crate::metrics::set_orchestrator_assigned_clusters(count); + + debug!( + instance_id = %instance_id, + "Synced {} cluster assignment(s)", count + ); +} + +fn parse_cluster(row: ClusterAssignmentRow) -> Option { + match serde_json::from_str::(&row.str_cluster_json) { + Ok(cluster) => Some(cluster), + Err(e) => { + error!( + "Failed to deserialize cluster assignment: {}. JSON: {}", + e, row.str_cluster_json + ); + None + } + } +} From 7999a8df9026b91176464e25f3d8e6c21c7e0134 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Wed, 8 Apr 2026 10:55:41 -0700 Subject: [PATCH 05/16] Avoid returning Success when no rows are updated on heartbeat Don't treat UPDATE 0 as a successful heartbeat. UPDATE ... WHERE pk_instance = $1 can legitimately affect 0 rows if delete_dead_instances() wins a transient race. Returning Ok(()) here leaves a healthy scheduler running but permanently absent from orchestrator membership until restart because the caller never learns it must re-register. --- rust/crates/scheduler/src/orchestrator/dao.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rust/crates/scheduler/src/orchestrator/dao.rs b/rust/crates/scheduler/src/orchestrator/dao.rs index 43b765103..7d71c9008 100644 --- a/rust/crates/scheduler/src/orchestrator/dao.rs +++ b/rust/crates/scheduler/src/orchestrator/dao.rs @@ -149,11 +149,14 @@ impl OrchestratorDao { instance_id: Uuid, jobs_queried: f64, ) -> Result<(), sqlx::Error> { - sqlx::query(UPDATE_HEARTBEAT) + let result = sqlx::query(UPDATE_HEARTBEAT) .bind(instance_id.to_string()) .bind(jobs_queried) .execute(&*self.connection_pool) .await?; + if result.rows_affected() == 0 { + return Err(sqlx::Error::RowNotFound); + } Ok(()) } From 4250198d1a900ce9b9899f74afa5491c15591fab Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Wed, 8 Apr 2026 11:24:58 -0700 Subject: [PATCH 06/16] Add long lived connection for leader election --- rust/crates/scheduler/src/orchestrator/dao.rs | 72 +++++++++++++++++-- .../scheduler/src/orchestrator/leader.rs | 16 ++++- 2 files changed, 78 insertions(+), 10 deletions(-) diff --git a/rust/crates/scheduler/src/orchestrator/dao.rs b/rust/crates/scheduler/src/orchestrator/dao.rs index 7d71c9008..49fbae5b6 100644 --- a/rust/crates/scheduler/src/orchestrator/dao.rs +++ b/rust/crates/scheduler/src/orchestrator/dao.rs @@ -15,17 +15,22 @@ use std::sync::Arc; use std::time::Duration; use miette::{IntoDiagnostic, Result}; -use sqlx::{Pool, Postgres}; +use sqlx::postgres::{PgConnectOptions, PgConnection}; +use sqlx::{ConnectOptions, Connection, Pool, Postgres}; +use tokio::sync::Mutex; use uuid::Uuid; -use crate::dao::helpers::parse_uuid; - use crate::cluster::Cluster; +use crate::config::CONFIG; +use crate::dao::helpers::parse_uuid; use crate::pgpool::connection_pool; /// Data Access Object for orchestrator tables (scheduler_instance and scheduler_cluster_assignment). pub struct OrchestratorDao { connection_pool: Arc>, + /// Dedicated connection for holding the session-level advisory lock. + /// Lives outside the pool so the lock is retained as long as this connection is open. + leader_conn: Mutex>, } #[derive(sqlx::FromRow, Debug, Clone)] @@ -122,9 +127,19 @@ impl OrchestratorDao { let pool = connection_pool().await.into_diagnostic()?; Ok(OrchestratorDao { connection_pool: pool, + leader_conn: Mutex::new(None), }) } + async fn open_dedicated_connection() -> Result { + let options: PgConnectOptions = CONFIG + .database + .connection_url() + .parse::()? + .application_name("opencue-leader-lock"); + options.connect().await + } + // --- Instance operations --- pub async fn register_instance( @@ -263,13 +278,56 @@ impl OrchestratorDao { // --- Leader election --- - /// Attempts to acquire the advisory lock. Returns true if acquired. - /// Uses a dedicated connection (not from the pool) to hold the session-level lock. + /// Attempts to acquire the advisory lock using a dedicated connection outside the pool. + /// + /// If the lock is already held (dedicated connection exists), verifies liveness via ping. + /// If the connection died, drops it and attempts re-acquisition on a fresh connection. + /// If the lock is acquired, the dedicated connection is kept alive to hold the lock. pub async fn try_acquire_leader_lock(&self, lock_id: i64) -> Result { + let mut guard = self.leader_conn.lock().await; + + // If we already have a connection, the lock is already held — verify liveness. + if let Some(ref mut conn) = *guard { + match conn.ping().await { + Ok(()) => return Ok(true), + Err(_) => { + // Connection died — lock is lost. Fall through to re-acquire. + *guard = None; + } + } + } + + // Open a new dedicated connection and attempt to acquire the lock. + let mut conn = Self::open_dedicated_connection().await?; let row: (bool,) = sqlx::query_as(TRY_ADVISORY_LOCK) .bind(lock_id) - .fetch_one(&*self.connection_pool) + .fetch_one(&mut conn) .await?; - Ok(row.0) + + if row.0 { + *guard = Some(conn); + Ok(true) + } else { + // Another leader holds it — drop this connection. + Ok(false) + } + } + + /// Releases the advisory lock by closing the dedicated connection. + /// If no lock is held, this is a no-op. + pub async fn release_leader_lock(&self) { + let mut guard = self.leader_conn.lock().await; + if let Some(conn) = guard.take() { + let _ = conn.close().await; + } + } + + /// Returns true if the dedicated leader connection is alive and the lock is presumably held. + pub async fn is_leader_lock_held(&self) -> bool { + let mut guard = self.leader_conn.lock().await; + match guard.as_mut() { + Some(conn) => conn.ping().await.is_ok(), + None => false, + } } } diff --git a/rust/crates/scheduler/src/orchestrator/leader.rs b/rust/crates/scheduler/src/orchestrator/leader.rs index 793eb0d88..041dd7592 100644 --- a/rust/crates/scheduler/src/orchestrator/leader.rs +++ b/rust/crates/scheduler/src/orchestrator/leader.rs @@ -101,6 +101,15 @@ impl LeaderElection { } } + // Verify the lock is still held (dedicated connection alive) + if !dao.is_leader_lock_held().await { + warn!("Leader lock connection lost — demoting"); + is_leader.store(false, Ordering::Relaxed); + crate::metrics::set_orchestrator_is_leader(false); + election_ticker.reset(); + continue; + } + // We are the leader — run one distribution cycle match distributor .distribute(&dao, &ignore_tags, failure_threshold) @@ -109,8 +118,8 @@ impl LeaderElection { Ok(()) => {} Err(e) => { error!("Distribution cycle failed: {}", e); - // If distribution fails, it might be a DB issue. - // Demote and re-enter election after interval. + // Release the lock so another instance can take over quickly. + dao.release_leader_lock().await; is_leader.store(false, Ordering::Relaxed); warn!("Demoted from leader due to distribution failure"); crate::metrics::set_orchestrator_is_leader(false); @@ -155,7 +164,8 @@ impl LeaderElection { } } - // On shutdown, demote + // On shutdown, release the lock and demote + dao.release_leader_lock().await; is_leader.store(false, Ordering::Relaxed); crate::metrics::set_orchestrator_is_leader(false); }) From f5d60577aacec0275b18acc220c05312e6da0613 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Wed, 8 Apr 2026 11:28:43 -0700 Subject: [PATCH 07/16] Minor fix on smoke test --- rust/crates/scheduler/tests/smoke_tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/crates/scheduler/tests/smoke_tests.rs b/rust/crates/scheduler/tests/smoke_tests.rs index 7b9d4e845..62b1324ea 100644 --- a/rust/crates/scheduler/tests/smoke_tests.rs +++ b/rust/crates/scheduler/tests/smoke_tests.rs @@ -934,7 +934,7 @@ mod scheduler_smoke_test { async fn test_dispatch_manual_tag_flow_inner(test_data: TestData) { // Create a cluster feed with MANUAL tags (chunked) - let manual_cluster = Cluster::multiple_tag( + let manual_cluster = Cluster::from_tags( test_data.facility_id, test_data.show_id, vec![Tag { From 73339c8b157f790bb09871e4d42664a78c9bee90 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Thu, 9 Apr 2026 14:56:00 -0700 Subject: [PATCH 08/16] Add integration tests with embedded db Use pb-embed to allow standing up a scheduler and test orchestration logic --- ...39__Add_scheduler_orchestrator_tables.sql} | 0 ...sql => V40__Add_unfinished_jobs_index.sql} | 0 rust/crates/scheduler/Cargo.toml | 4 +- rust/crates/scheduler/resources/migrations | 1 + rust/crates/scheduler/resources/schema | 4307 ----------------- rust/crates/scheduler/resources/seed_data.sql | 1 + rust/crates/scheduler/src/cluster.rs | 21 + rust/crates/scheduler/src/dao/cluster_dao.rs | 6 + rust/crates/scheduler/src/dao/host_dao.rs | 6 + rust/crates/scheduler/src/dao/job_dao.rs | 6 + rust/crates/scheduler/src/dao/layer_dao.rs | 6 + rust/crates/scheduler/src/dao/proc_dao.rs | 6 + .../src/dao/resource_accounting_dao.rs | 6 + rust/crates/scheduler/src/lib.rs | 1 + rust/crates/scheduler/src/orchestrator/dao.rs | 54 +- .../scheduler/src/orchestrator/distributor.rs | 2 +- .../scheduler/src/orchestrator/instance.rs | 27 +- rust/crates/scheduler/src/orchestrator/mod.rs | 8 +- rust/crates/scheduler/tests/embedded_db.rs | 178 + .../scheduler/tests/integration_tests.rs | 263 + 20 files changed, 567 insertions(+), 4336 deletions(-) rename cuebot/src/main/resources/conf/ddl/postgres/migrations/{V38__Add_scheduler_orchestrator_tables.sql => V39__Add_scheduler_orchestrator_tables.sql} (100%) rename cuebot/src/main/resources/conf/ddl/postgres/migrations/{V39__Add_unfinished_jobs_index.sql => V40__Add_unfinished_jobs_index.sql} (100%) create mode 120000 rust/crates/scheduler/resources/migrations delete mode 100644 rust/crates/scheduler/resources/schema create mode 120000 rust/crates/scheduler/resources/seed_data.sql create mode 100644 rust/crates/scheduler/tests/embedded_db.rs create mode 100644 rust/crates/scheduler/tests/integration_tests.rs diff --git a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V38__Add_scheduler_orchestrator_tables.sql b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V39__Add_scheduler_orchestrator_tables.sql similarity index 100% rename from cuebot/src/main/resources/conf/ddl/postgres/migrations/V38__Add_scheduler_orchestrator_tables.sql rename to cuebot/src/main/resources/conf/ddl/postgres/migrations/V39__Add_scheduler_orchestrator_tables.sql diff --git a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V39__Add_unfinished_jobs_index.sql b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V40__Add_unfinished_jobs_index.sql similarity index 100% rename from cuebot/src/main/resources/conf/ddl/postgres/migrations/V39__Add_unfinished_jobs_index.sql rename to cuebot/src/main/resources/conf/ddl/postgres/migrations/V40__Add_unfinished_jobs_index.sql diff --git a/rust/crates/scheduler/Cargo.toml b/rust/crates/scheduler/Cargo.toml index 13b6d1327..1d4f8bc0b 100644 --- a/rust/crates/scheduler/Cargo.toml +++ b/rust/crates/scheduler/Cargo.toml @@ -43,7 +43,7 @@ tonic = { workspace = true } itertools = "0.13.0" humantime = "2.2.0" humantime-serde = "1.1.1" -sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "chrono"] } +sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "chrono", "uuid"] } home = { workspace = true } structopt = { workspace = true } once_cell = "1.13" @@ -63,9 +63,11 @@ rand = "0.8" [features] default = [] smoke-tests = [] +integration-tests = [] [dev-dependencies] tokio-test = "0.4" tracing-test = "0.2" serial_test = "3.0" rand = "0.8" +pg-embed = "1.0" diff --git a/rust/crates/scheduler/resources/migrations b/rust/crates/scheduler/resources/migrations new file mode 120000 index 000000000..8c6270ee4 --- /dev/null +++ b/rust/crates/scheduler/resources/migrations @@ -0,0 +1 @@ +../../../../cuebot/src/main/resources/conf/ddl/postgres/migrations \ No newline at end of file diff --git a/rust/crates/scheduler/resources/schema b/rust/crates/scheduler/resources/schema deleted file mode 100644 index 2173eed09..000000000 --- a/rust/crates/scheduler/resources/schema +++ /dev/null @@ -1,4307 +0,0 @@ --- --- PostgreSQL database dump --- - --- Dumped from database version 15.1 (Debian 15.1-1.pgdg110+1) --- Dumped by pg_dump version 15.13 - --- Started on 2025-09-04 18:51:09 UTC - -SET statement_timeout = 0; -SET lock_timeout = 0; -SET idle_in_transaction_session_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = on; -SELECT pg_catalog.set_config('search_path', '', false); -SET check_function_bodies = false; -SET xmloption = content; -SET client_min_messages = warning; -SET row_security = off; - -SET default_tablespace = ''; - -SET default_table_access_method = heap; - --- --- TOC entry 266 (class 1259 OID 16828) --- Name: action; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.action ( - pk_action character varying(36) NOT NULL, - pk_filter character varying(36) NOT NULL, - pk_folder character varying(36), - str_action character varying(24) NOT NULL, - str_value_type character varying(24) NOT NULL, - str_value character varying(4000), - int_value bigint, - b_value boolean, - ts_created timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - float_value numeric(6,2), - b_stop boolean DEFAULT false NOT NULL -); - - -ALTER TABLE public.action OWNER TO cuebot; - --- --- TOC entry 265 (class 1259 OID 16821) --- Name: alloc; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.alloc ( - pk_alloc character varying(36) NOT NULL, - str_name character varying(36) NOT NULL, - b_allow_edit boolean DEFAULT true NOT NULL, - b_default boolean DEFAULT false NOT NULL, - str_tag character varying(24), - b_billable boolean DEFAULT true NOT NULL, - pk_facility character varying(36) NOT NULL, - b_enabled boolean DEFAULT true -); - - -ALTER TABLE public.alloc OWNER TO cuebot; - --- --- TOC entry 264 (class 1259 OID 16815) --- Name: comments; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.comments ( - pk_comment character varying(36) NOT NULL, - pk_job character varying(36), - pk_host character varying(36), - ts_created timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - str_user character varying(36) NOT NULL, - str_subject character varying(128) NOT NULL, - str_message character varying(4000) NOT NULL -); - - -ALTER TABLE public.comments OWNER TO cuebot; - --- --- TOC entry 263 (class 1259 OID 16808) --- Name: config; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.config ( - pk_config character varying(36) NOT NULL, - str_key character varying(36) NOT NULL, - int_value bigint DEFAULT 0, - long_value bigint DEFAULT 0, - str_value character varying(255) DEFAULT ''::character varying, - b_value boolean DEFAULT false -); - - -ALTER TABLE public.config OWNER TO cuebot; - --- --- TOC entry 224 (class 1259 OID 16453) --- Name: deed; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.deed ( - pk_deed character varying(36) NOT NULL, - pk_owner character varying(36) NOT NULL, - pk_host character varying(36) NOT NULL, - b_blackout boolean DEFAULT false NOT NULL, - int_blackout_start integer, - int_blackout_stop integer -); - - -ALTER TABLE public.deed OWNER TO cuebot; - --- --- TOC entry 262 (class 1259 OID 16800) --- Name: depend; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.depend ( - pk_depend character varying(36) NOT NULL, - pk_parent character varying(36), - pk_job_depend_on character varying(36) NOT NULL, - pk_job_depend_er character varying(36) NOT NULL, - pk_frame_depend_on character varying(36), - pk_frame_depend_er character varying(36), - pk_layer_depend_on character varying(36), - pk_layer_depend_er character varying(36), - str_type character varying(36) NOT NULL, - b_active boolean DEFAULT true NOT NULL, - b_any boolean DEFAULT false NOT NULL, - ts_created timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - ts_satisfied timestamp(6) without time zone, - str_target character varying(20) DEFAULT 'Internal'::character varying NOT NULL, - str_signature character varying(36) NOT NULL, - b_composite boolean DEFAULT false NOT NULL -); - - -ALTER TABLE public.depend OWNER TO cuebot; - --- --- TOC entry 235 (class 1259 OID 16518) --- Name: dept; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.dept ( - pk_dept character varying(36) NOT NULL, - str_name character varying(36) NOT NULL, - b_default boolean DEFAULT false NOT NULL -); - - -ALTER TABLE public.dept OWNER TO cuebot; - --- --- TOC entry 219 (class 1259 OID 16427) --- Name: duplicate_cursors; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.duplicate_cursors ( - dt_recorded date, - inst_id numeric, - lng_count numeric -); - - -ALTER TABLE public.duplicate_cursors OWNER TO cuebot; - --- --- TOC entry 236 (class 1259 OID 16522) --- Name: facility; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.facility ( - pk_facility character varying(36) NOT NULL, - str_name character varying(36) NOT NULL, - b_default boolean DEFAULT false NOT NULL -); - - -ALTER TABLE public.facility OWNER TO cuebot; - --- --- TOC entry 261 (class 1259 OID 16795) --- Name: filter; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.filter ( - pk_filter character varying(36) NOT NULL, - pk_show character varying(36) NOT NULL, - str_name character varying(128) NOT NULL, - str_type character varying(16) NOT NULL, - f_order numeric(6,2) DEFAULT 0.0 NOT NULL, - b_enabled boolean DEFAULT true NOT NULL -); - - -ALTER TABLE public.filter OWNER TO cuebot; - --- --- TOC entry 215 (class 1259 OID 16385) --- Name: flyway_schema_history; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.flyway_schema_history ( - installed_rank integer NOT NULL, - version character varying(50), - description character varying(200) NOT NULL, - type character varying(20) NOT NULL, - script character varying(1000) NOT NULL, - checksum integer, - installed_by character varying(100) NOT NULL, - installed_on timestamp without time zone DEFAULT now() NOT NULL, - execution_time integer NOT NULL, - success boolean NOT NULL -); - - -ALTER TABLE public.flyway_schema_history OWNER TO cuebot; - --- --- TOC entry 260 (class 1259 OID 16783) --- Name: folder; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.folder ( - pk_folder character varying(36) NOT NULL, - pk_parent_folder character varying(36), - pk_show character varying(36) NOT NULL, - str_name character varying(36) NOT NULL, - int_priority bigint DEFAULT 1 NOT NULL, - b_default boolean DEFAULT false NOT NULL, - pk_dept character varying(36) NOT NULL, - int_job_min_cores integer DEFAULT '-1'::integer NOT NULL, - int_job_max_cores integer DEFAULT '-1'::integer NOT NULL, - int_job_priority integer DEFAULT '-1'::integer NOT NULL, - int_min_cores integer DEFAULT 0 NOT NULL, - int_max_cores integer DEFAULT '-1'::integer NOT NULL, - b_exclude_managed boolean DEFAULT false NOT NULL, - f_order integer DEFAULT 0 NOT NULL, - int_job_min_gpus integer DEFAULT '-1'::integer NOT NULL, - int_job_max_gpus integer DEFAULT '-1'::integer NOT NULL, - int_min_gpus integer DEFAULT 0 NOT NULL, - int_max_gpus integer DEFAULT '-1'::integer NOT NULL -); - - -ALTER TABLE public.folder OWNER TO cuebot; - --- --- TOC entry 259 (class 1259 OID 16779) --- Name: folder_level; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.folder_level ( - pk_folder_level character varying(36) NOT NULL, - pk_folder character varying(36) NOT NULL, - int_level bigint DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.folder_level OWNER TO cuebot; - --- --- TOC entry 233 (class 1259 OID 16508) --- Name: folder_resource; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.folder_resource ( - pk_folder_resource character varying(36) NOT NULL, - pk_folder character varying(36) NOT NULL, - int_cores integer DEFAULT 0 NOT NULL, - int_max_cores integer DEFAULT '-1'::integer NOT NULL, - int_min_cores integer DEFAULT 0 NOT NULL, - float_tier numeric(16,2) DEFAULT 0 NOT NULL, - int_gpus integer DEFAULT 0 NOT NULL, - int_max_gpus integer DEFAULT '-1'::integer NOT NULL, - int_min_gpus integer DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.folder_resource OWNER TO cuebot; - --- --- TOC entry 258 (class 1259 OID 16761) --- Name: frame; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.frame ( - pk_frame character varying(36) NOT NULL, - pk_layer character varying(36) NOT NULL, - pk_job character varying(36) NOT NULL, - str_name character varying(256) NOT NULL, - str_state character varying(24) NOT NULL, - int_number bigint NOT NULL, - int_depend_count bigint DEFAULT 0 NOT NULL, - int_exit_status bigint DEFAULT '-1'::integer NOT NULL, - int_retries bigint DEFAULT 0 NOT NULL, - int_mem_reserved bigint DEFAULT 0 NOT NULL, - int_mem_max_used bigint DEFAULT 0 NOT NULL, - int_mem_used bigint DEFAULT 0 NOT NULL, - int_dispatch_order bigint DEFAULT 0 NOT NULL, - str_host character varying(256), - int_cores integer DEFAULT 0 NOT NULL, - int_layer_order integer NOT NULL, - ts_started timestamp(6) with time zone, - ts_stopped timestamp(6) with time zone, - ts_last_run timestamp(6) with time zone, - ts_updated timestamp(6) with time zone, - int_version integer DEFAULT 0, - str_checkpoint_state character varying(12) DEFAULT 'DISABLED'::character varying NOT NULL, - int_checkpoint_count smallint DEFAULT 0 NOT NULL, - int_gpu_mem_reserved bigint DEFAULT 0 NOT NULL, - int_total_past_core_time integer DEFAULT 0 NOT NULL, - ts_llu timestamp(6) with time zone, - int_gpu_mem_used bigint DEFAULT 0 NOT NULL, - int_gpu_mem_max_used bigint DEFAULT 0 NOT NULL, - int_gpus integer DEFAULT 0 NOT NULL, - int_total_past_gpu_time integer DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.frame OWNER TO cuebot; - --- --- TOC entry 217 (class 1259 OID 16408) --- Name: frame_history; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.frame_history ( - pk_frame_history character varying(36) DEFAULT public.uuid_generate_v1() NOT NULL, - pk_frame character varying(36) NOT NULL, - pk_layer character varying(36) NOT NULL, - pk_job character varying(36) NOT NULL, - str_name character varying(256) NOT NULL, - str_state character varying(24) NOT NULL, - int_mem_reserved bigint DEFAULT 0 NOT NULL, - int_mem_max_used bigint DEFAULT 0 NOT NULL, - int_cores integer DEFAULT 100 NOT NULL, - str_host character varying(64) DEFAULT NULL::character varying, - int_exit_status smallint DEFAULT '-1'::integer NOT NULL, - pk_alloc character varying(36), - int_ts_started integer NOT NULL, - int_ts_stopped integer DEFAULT 0 NOT NULL, - int_checkpoint_count integer DEFAULT 0 NOT NULL, - dt_last_modified date NOT NULL, - int_gpus integer DEFAULT 0 NOT NULL, - int_gpu_mem_reserved bigint DEFAULT 0 NOT NULL, - int_gpu_mem_max_used bigint DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.frame_history OWNER TO cuebot; - --- --- TOC entry 4297 (class 0 OID 0) --- Dependencies: 217 --- Name: COLUMN frame_history.int_mem_reserved; Type: COMMENT; Schema: public; Owner: cuebot --- - -COMMENT ON COLUMN public.frame_history.int_mem_reserved IS 'kilobytes of memory reserved'; - - --- --- TOC entry 4298 (class 0 OID 0) --- Dependencies: 217 --- Name: COLUMN frame_history.int_mem_max_used; Type: COMMENT; Schema: public; Owner: cuebot --- - -COMMENT ON COLUMN public.frame_history.int_mem_max_used IS 'maximum kilobytes of rss memory used'; - - --- --- TOC entry 4299 (class 0 OID 0) --- Dependencies: 217 --- Name: COLUMN frame_history.int_cores; Type: COMMENT; Schema: public; Owner: cuebot --- - -COMMENT ON COLUMN public.frame_history.int_cores IS '100 cores per physical core'; - - --- --- TOC entry 282 (class 1259 OID 17964) --- Name: frame_state_display_overrides; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.frame_state_display_overrides ( - pk_frame_override character varying(36) NOT NULL, - pk_frame character varying(36) NOT NULL, - str_frame_state character varying(24) NOT NULL, - str_override_text character varying(24) NOT NULL, - str_rgb character varying(24) NOT NULL -); - - -ALTER TABLE public.frame_state_display_overrides OWNER TO cuebot; - --- --- TOC entry 218 (class 1259 OID 16421) --- Name: history_period; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.history_period ( - pk character varying(36) DEFAULT public.uuid_generate_v1() NOT NULL, - dt_begin date DEFAULT to_date('01-JAN-2000'::text, 'DD-MON-YYYY'::text) NOT NULL, - dt_end date DEFAULT CURRENT_TIMESTAMP NOT NULL -); - - -ALTER TABLE public.history_period OWNER TO cuebot; - --- --- TOC entry 216 (class 1259 OID 16405) --- Name: history_period_bak; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.history_period_bak ( - pk character varying(32), - dt_begin date NOT NULL, - dt_end date NOT NULL -); - - -ALTER TABLE public.history_period_bak OWNER TO cuebot; - --- --- TOC entry 257 (class 1259 OID 16742) --- Name: host; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.host ( - pk_host character varying(36) NOT NULL, - pk_alloc character varying(36) NOT NULL, - str_name character varying(45) NOT NULL, - str_lock_state character varying(36) NOT NULL, - b_nimby boolean DEFAULT false NOT NULL, - ts_created timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - ts_last_updated timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - int_cores bigint DEFAULT 0 NOT NULL, - int_procs bigint DEFAULT 0 NOT NULL, - int_cores_idle bigint DEFAULT 0 NOT NULL, - int_mem bigint DEFAULT 0 NOT NULL, - int_mem_idle bigint DEFAULT 0 NOT NULL, - b_unlock_boot boolean DEFAULT false NOT NULL, - b_unlock_idle boolean DEFAULT false NOT NULL, - b_reboot_idle boolean DEFAULT false NOT NULL, - str_tags character varying(128), - str_fqdn character varying(128), - b_comment boolean DEFAULT false NOT NULL, - int_thread_mode integer DEFAULT 0 NOT NULL, - str_lock_source character varying(128), - int_gpu_mem bigint DEFAULT 0 NOT NULL, - int_gpu_mem_idle bigint DEFAULT 0 NOT NULL, - int_gpus bigint DEFAULT 0 NOT NULL, - int_gpus_idle bigint DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.host OWNER TO cuebot; - --- --- TOC entry 226 (class 1259 OID 16462) --- Name: host_local; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.host_local ( - pk_host_local character varying(36) NOT NULL, - pk_job character varying(36) NOT NULL, - pk_layer character varying(36), - pk_frame character varying(36), - pk_host character varying(36) NOT NULL, - ts_created timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - ts_updated timestamp(6) with time zone, - int_mem_max bigint DEFAULT 0 NOT NULL, - int_mem_idle bigint DEFAULT 0 NOT NULL, - int_cores_max integer DEFAULT 100 NOT NULL, - int_cores_idle integer DEFAULT 100 NOT NULL, - int_threads integer DEFAULT 1 NOT NULL, - float_tier numeric(16,2) DEFAULT 0 NOT NULL, - b_active boolean DEFAULT true NOT NULL, - str_type character varying(36) NOT NULL, - int_gpu_mem_idle bigint DEFAULT 0 NOT NULL, - int_gpu_mem_max bigint DEFAULT 0 NOT NULL, - int_gpus_idle integer DEFAULT 0 NOT NULL, - int_gpus_max integer DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.host_local OWNER TO cuebot; - --- --- TOC entry 256 (class 1259 OID 16726) --- Name: host_stat; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.host_stat ( - pk_host_stat character varying(36) NOT NULL, - pk_host character varying(36) NOT NULL, - int_mem_total bigint DEFAULT 0 NOT NULL, - int_mem_free bigint DEFAULT 0 NOT NULL, - int_swap_total bigint DEFAULT 0 NOT NULL, - int_swap_free bigint DEFAULT 0 NOT NULL, - int_mcp_total bigint DEFAULT 0 NOT NULL, - int_mcp_free bigint DEFAULT 0 NOT NULL, - int_load bigint DEFAULT 0 NOT NULL, - ts_ping timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - ts_booted timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - str_state character varying(32) DEFAULT 'UP'::character varying NOT NULL, - str_os character varying(12) DEFAULT 'rhel40'::character varying NOT NULL, - int_gpu_mem_total bigint DEFAULT 0 NOT NULL, - int_gpu_mem_free bigint DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.host_stat OWNER TO cuebot; - --- --- TOC entry 241 (class 1259 OID 16572) --- Name: host_tag; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.host_tag ( - pk_host_tag character varying(36) NOT NULL, - pk_host character varying(36) NOT NULL, - str_tag character varying(45) NOT NULL, - str_tag_type character varying(24) DEFAULT 'Hardware'::character varying NOT NULL, - b_constant boolean DEFAULT false NOT NULL -); - - -ALTER TABLE public.host_tag OWNER TO cuebot; - --- --- TOC entry 255 (class 1259 OID 16705) --- Name: job; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.job ( - pk_job character varying(36) NOT NULL, - pk_folder character varying(36) NOT NULL, - pk_show character varying(36) NOT NULL, - str_name character varying(255) NOT NULL, - str_visible_name character varying(255), - str_shot character varying(64) NOT NULL, - str_user character varying(32) NOT NULL, - str_state character varying(16) NOT NULL, - str_log_dir character varying(4000) DEFAULT ''::character varying NOT NULL, - int_uid bigint, - b_paused boolean DEFAULT false NOT NULL, - b_autoeat boolean DEFAULT false NOT NULL, - int_frame_count integer DEFAULT 0 NOT NULL, - int_layer_count integer DEFAULT 0 NOT NULL, - int_max_retries smallint DEFAULT 3 NOT NULL, - b_auto_book boolean DEFAULT true NOT NULL, - b_auto_unbook boolean DEFAULT true NOT NULL, - b_comment boolean DEFAULT false NOT NULL, - str_email character varying(256), - pk_facility character varying(36) NOT NULL, - pk_dept character varying(36) NOT NULL, - ts_started timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - ts_stopped timestamp(6) with time zone, - int_min_cores integer DEFAULT 100 NOT NULL, - int_max_cores integer DEFAULT 20000 NOT NULL, - str_show character varying(512) DEFAULT 'none'::character varying NOT NULL, - ts_updated timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - str_os character varying(12), - int_min_gpus integer DEFAULT 0 NOT NULL, - int_max_gpus integer DEFAULT 100000 NOT NULL -); - - -ALTER TABLE public.job OWNER TO cuebot; - --- --- TOC entry 254 (class 1259 OID 16700) --- Name: job_env; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.job_env ( - pk_job_env character varying(36) NOT NULL, - pk_job character varying(36), - str_key character varying(2048), - str_value character varying(2048) -); - - -ALTER TABLE public.job_env OWNER TO cuebot; - --- --- TOC entry 239 (class 1259 OID 16548) --- Name: job_history; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.job_history ( - pk_job character varying(36) NOT NULL, - pk_show character varying(36) NOT NULL, - str_name character varying(512) NOT NULL, - str_shot character varying(64) NOT NULL, - str_user character varying(36) NOT NULL, - int_core_time_success bigint DEFAULT 0 NOT NULL, - int_core_time_fail bigint DEFAULT 0 NOT NULL, - int_frame_count bigint DEFAULT 0 NOT NULL, - int_layer_count bigint DEFAULT 0 NOT NULL, - int_waiting_count bigint DEFAULT 0 NOT NULL, - int_dead_count bigint DEFAULT 0 NOT NULL, - int_depend_count bigint DEFAULT 0 NOT NULL, - int_eaten_count bigint DEFAULT 0 NOT NULL, - int_succeeded_count bigint DEFAULT 0 NOT NULL, - int_running_count bigint DEFAULT 0 NOT NULL, - int_max_rss bigint DEFAULT 0 NOT NULL, - b_archived boolean DEFAULT false NOT NULL, - pk_facility character varying(36) NOT NULL, - pk_dept character varying(36) NOT NULL, - int_ts_started integer NOT NULL, - int_ts_stopped integer DEFAULT 0 NOT NULL, - dt_last_modified date NOT NULL, - int_gpu_time_success bigint DEFAULT 0 NOT NULL, - int_gpu_time_fail bigint DEFAULT 0 NOT NULL, - int_gpu_mem_max bigint DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.job_history OWNER TO cuebot; - --- --- TOC entry 4300 (class 0 OID 0) --- Dependencies: 239 --- Name: COLUMN job_history.int_core_time_success; Type: COMMENT; Schema: public; Owner: cuebot --- - -COMMENT ON COLUMN public.job_history.int_core_time_success IS 'seconds per core succeeded'; - - --- --- TOC entry 4301 (class 0 OID 0) --- Dependencies: 239 --- Name: COLUMN job_history.int_core_time_fail; Type: COMMENT; Schema: public; Owner: cuebot --- - -COMMENT ON COLUMN public.job_history.int_core_time_fail IS 'seconds per core failed'; - - --- --- TOC entry 4302 (class 0 OID 0) --- Dependencies: 239 --- Name: COLUMN job_history.int_max_rss; Type: COMMENT; Schema: public; Owner: cuebot --- - -COMMENT ON COLUMN public.job_history.int_max_rss IS 'maximum kilobytes of rss memory used by a single frame'; - - --- --- TOC entry 228 (class 1259 OID 16480) --- Name: job_local; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.job_local ( - pk_job_local character varying(36) NOT NULL, - pk_job character varying(36) NOT NULL, - pk_host character varying(36) NOT NULL, - str_source character varying(255) NOT NULL, - ts_created timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - int_cores integer DEFAULT 0 NOT NULL, - int_max_cores integer NOT NULL, - int_gpus integer DEFAULT 0 NOT NULL, - int_max_gpus integer DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.job_local OWNER TO cuebot; - --- --- TOC entry 232 (class 1259 OID 16503) --- Name: job_mem; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.job_mem ( - pk_job_mem character varying(36) NOT NULL, - pk_job character varying(36) NOT NULL, - int_max_rss bigint DEFAULT 0 NOT NULL, - int_max_vss bigint DEFAULT 0 NOT NULL, - int_gpu_mem_max bigint DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.job_mem OWNER TO cuebot; - --- --- TOC entry 237 (class 1259 OID 16526) --- Name: job_post; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.job_post ( - pk_job_post character varying(36) NOT NULL, - pk_job character varying(36) NOT NULL, - pk_post_job character varying(36) NOT NULL -); - - -ALTER TABLE public.job_post OWNER TO cuebot; - --- --- TOC entry 243 (class 1259 OID 16587) --- Name: job_resource; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.job_resource ( - pk_job_resource character varying(36) NOT NULL, - pk_job character varying(36) NOT NULL, - int_cores bigint DEFAULT 0 NOT NULL, - int_max_rss bigint DEFAULT 0 NOT NULL, - int_max_vss bigint DEFAULT 0 NOT NULL, - int_min_cores integer DEFAULT 100 NOT NULL, - int_max_cores integer DEFAULT 10000 NOT NULL, - float_tier numeric(16,2) DEFAULT 0 NOT NULL, - int_priority integer DEFAULT 1 NOT NULL, - int_local_cores integer DEFAULT 0 NOT NULL, - int_gpus integer DEFAULT 0 NOT NULL, - int_min_gpus integer DEFAULT 0 NOT NULL, - int_max_gpus integer DEFAULT 100 NOT NULL, - int_local_gpus integer DEFAULT 0 NOT NULL, - int_gpu_mem_max bigint DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.job_resource OWNER TO cuebot; - --- --- TOC entry 244 (class 1259 OID 16598) --- Name: job_stat; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.job_stat ( - pk_job_stat character varying(36) NOT NULL, - pk_job character varying(36) NOT NULL, - int_waiting_count bigint DEFAULT 0 NOT NULL, - int_running_count bigint DEFAULT 0 NOT NULL, - int_dead_count bigint DEFAULT 0 NOT NULL, - int_depend_count bigint DEFAULT 0 NOT NULL, - int_eaten_count bigint DEFAULT 0 NOT NULL, - int_succeeded_count bigint DEFAULT 0 NOT NULL, - int_checkpoint_count bigint DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.job_stat OWNER TO cuebot; - --- --- TOC entry 242 (class 1259 OID 16577) --- Name: job_usage; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.job_usage ( - pk_job_usage character varying(36) NOT NULL, - pk_job character varying(36) NOT NULL, - int_core_time_success bigint DEFAULT 0 NOT NULL, - int_core_time_fail bigint DEFAULT 0 NOT NULL, - int_frame_success_count integer DEFAULT 0 NOT NULL, - int_frame_fail_count integer DEFAULT 0 NOT NULL, - int_clock_time_fail integer DEFAULT 0 NOT NULL, - int_clock_time_high integer DEFAULT 0 NOT NULL, - int_clock_time_success integer DEFAULT 0 NOT NULL, - int_gpu_time_success bigint DEFAULT 0 NOT NULL, - int_gpu_time_fail bigint DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.job_usage OWNER TO cuebot; - --- --- TOC entry 253 (class 1259 OID 16685) --- Name: layer; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.layer ( - pk_layer character varying(36) NOT NULL, - pk_job character varying(36) NOT NULL, - str_name character varying(256) NOT NULL, - str_cmd character varying(4000) NOT NULL, - str_range character varying(4000) NOT NULL, - int_chunk_size bigint DEFAULT 1 NOT NULL, - int_dispatch_order bigint DEFAULT 1 NOT NULL, - int_cores_min bigint DEFAULT 100 NOT NULL, - int_mem_min bigint DEFAULT 4194304 NOT NULL, - str_tags character varying(4000) DEFAULT ''::character varying NOT NULL, - str_type character varying(16) NOT NULL, - b_threadable boolean DEFAULT true NOT NULL, - str_services character varying(128) DEFAULT 'default'::character varying NOT NULL, - b_optimize boolean DEFAULT true NOT NULL, - int_cores_max integer DEFAULT 0 NOT NULL, - int_gpu_mem_min bigint DEFAULT 0 NOT NULL, - int_timeout integer DEFAULT 0 NOT NULL, - int_timeout_llu integer DEFAULT 0 NOT NULL, - int_gpus_min bigint DEFAULT 0 NOT NULL, - int_gpus_max bigint DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.layer OWNER TO cuebot; - --- --- TOC entry 252 (class 1259 OID 16680) --- Name: layer_env; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.layer_env ( - pk_layer_env character varying(36) NOT NULL, - pk_layer character varying(36), - pk_job character varying(36), - str_key character varying(2048), - str_value character varying(2048) -); - - -ALTER TABLE public.layer_env OWNER TO cuebot; - --- --- TOC entry 238 (class 1259 OID 16529) --- Name: layer_history; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.layer_history ( - pk_layer character varying(36) NOT NULL, - pk_job character varying(36) NOT NULL, - str_name character varying(512) NOT NULL, - str_type character varying(16) NOT NULL, - int_cores_min bigint DEFAULT 100 NOT NULL, - int_mem_min bigint DEFAULT 4194304 NOT NULL, - int_core_time_success bigint DEFAULT 0 NOT NULL, - int_core_time_fail bigint DEFAULT 0 NOT NULL, - int_frame_count bigint DEFAULT 0 NOT NULL, - int_layer_count bigint DEFAULT 0 NOT NULL, - int_waiting_count bigint DEFAULT 0 NOT NULL, - int_dead_count bigint DEFAULT 0 NOT NULL, - int_depend_count bigint DEFAULT 0 NOT NULL, - int_eaten_count bigint DEFAULT 0 NOT NULL, - int_succeeded_count bigint DEFAULT 0 NOT NULL, - int_running_count bigint DEFAULT 0 NOT NULL, - int_max_rss bigint DEFAULT 0 NOT NULL, - b_archived boolean DEFAULT false NOT NULL, - dt_last_modified date NOT NULL, - str_services character varying(128), - int_gpus_min integer DEFAULT 0 NOT NULL, - int_gpu_time_success bigint DEFAULT 0 NOT NULL, - int_gpu_time_fail bigint DEFAULT 0 NOT NULL, - int_gpu_mem_min bigint DEFAULT 0 NOT NULL, - int_gpu_mem_max bigint DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.layer_history OWNER TO cuebot; - --- --- TOC entry 4303 (class 0 OID 0) --- Dependencies: 238 --- Name: COLUMN layer_history.int_core_time_success; Type: COMMENT; Schema: public; Owner: cuebot --- - -COMMENT ON COLUMN public.layer_history.int_core_time_success IS 'seconds per core succeeded'; - - --- --- TOC entry 4304 (class 0 OID 0) --- Dependencies: 238 --- Name: COLUMN layer_history.int_core_time_fail; Type: COMMENT; Schema: public; Owner: cuebot --- - -COMMENT ON COLUMN public.layer_history.int_core_time_fail IS 'seconds per core failed'; - - --- --- TOC entry 4305 (class 0 OID 0) --- Dependencies: 238 --- Name: COLUMN layer_history.int_max_rss; Type: COMMENT; Schema: public; Owner: cuebot --- - -COMMENT ON COLUMN public.layer_history.int_max_rss IS 'maximum kilobytes of rss memory used by a single frame'; - - --- --- TOC entry 273 (class 1259 OID 17546) --- Name: layer_limit; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.layer_limit ( - pk_layer_limit character varying(36) NOT NULL, - pk_layer character varying(36) NOT NULL, - pk_limit_record character varying(36) NOT NULL -); - - -ALTER TABLE public.layer_limit OWNER TO cuebot; - --- --- TOC entry 231 (class 1259 OID 16498) --- Name: layer_mem; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.layer_mem ( - pk_layer_mem character varying(36) NOT NULL, - pk_job character varying(36) NOT NULL, - pk_layer character varying(36) NOT NULL, - int_max_rss bigint DEFAULT 0 NOT NULL, - int_max_vss bigint DEFAULT 0 NOT NULL, - int_gpu_mem_max bigint DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.layer_mem OWNER TO cuebot; - --- --- TOC entry 222 (class 1259 OID 16443) --- Name: layer_output; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.layer_output ( - pk_layer_output character varying(36) NOT NULL, - pk_layer character varying(36) NOT NULL, - pk_job character varying(36) NOT NULL, - str_filespec character varying(2048) NOT NULL, - ser_order integer NOT NULL -); - - -ALTER TABLE public.layer_output OWNER TO cuebot; - --- --- TOC entry 283 (class 1259 OID 17975) --- Name: layer_output_ser_order_seq; Type: SEQUENCE; Schema: public; Owner: cuebot --- - -CREATE SEQUENCE public.layer_output_ser_order_seq - AS integer - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE public.layer_output_ser_order_seq OWNER TO cuebot; - --- --- TOC entry 4306 (class 0 OID 0) --- Dependencies: 283 --- Name: layer_output_ser_order_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: cuebot --- - -ALTER SEQUENCE public.layer_output_ser_order_seq OWNED BY public.layer_output.ser_order; - - --- --- TOC entry 251 (class 1259 OID 16674) --- Name: layer_resource; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.layer_resource ( - pk_layer_resource character varying(36) NOT NULL, - pk_layer character varying(36) NOT NULL, - pk_job character varying(36) NOT NULL, - int_cores bigint DEFAULT 0 NOT NULL, - int_max_rss bigint DEFAULT 0 NOT NULL, - int_max_vss bigint DEFAULT 0 NOT NULL, - int_gpus integer DEFAULT 0 NOT NULL, - int_gpu_mem_max bigint DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.layer_resource OWNER TO cuebot; - --- --- TOC entry 250 (class 1259 OID 16663) --- Name: layer_stat; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.layer_stat ( - pk_layer_stat character varying(36) NOT NULL, - pk_layer character varying(36) NOT NULL, - pk_job character varying(36) NOT NULL, - int_total_count bigint DEFAULT 0 NOT NULL, - int_waiting_count bigint DEFAULT 0 NOT NULL, - int_running_count bigint DEFAULT 0 NOT NULL, - int_dead_count bigint DEFAULT 0 NOT NULL, - int_depend_count bigint DEFAULT 0 NOT NULL, - int_eaten_count bigint DEFAULT 0 NOT NULL, - int_succeeded_count bigint DEFAULT 0 NOT NULL, - int_checkpoint_count bigint DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.layer_stat OWNER TO cuebot; - --- --- TOC entry 249 (class 1259 OID 16652) --- Name: layer_usage; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.layer_usage ( - pk_layer_usage character varying(36) NOT NULL, - pk_layer character varying(36) NOT NULL, - pk_job character varying(36) NOT NULL, - int_core_time_success bigint DEFAULT 0 NOT NULL, - int_core_time_fail bigint DEFAULT 0 NOT NULL, - int_frame_success_count integer DEFAULT 0 NOT NULL, - int_frame_fail_count integer DEFAULT 0 NOT NULL, - int_clock_time_fail integer DEFAULT 0 NOT NULL, - int_clock_time_high integer DEFAULT 0 NOT NULL, - int_clock_time_low integer DEFAULT 0 NOT NULL, - int_clock_time_success integer DEFAULT 0 NOT NULL, - int_gpu_time_success bigint DEFAULT 0 NOT NULL, - int_gpu_time_fail bigint DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.layer_usage OWNER TO cuebot; - --- --- TOC entry 272 (class 1259 OID 17542) --- Name: limit_record; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.limit_record ( - pk_limit_record character varying(36) NOT NULL, - str_name character varying(255) NOT NULL, - int_max_value integer, - b_host_limit boolean DEFAULT false NOT NULL -); - - -ALTER TABLE public.limit_record OWNER TO cuebot; - --- --- TOC entry 248 (class 1259 OID 16646) --- Name: matcher; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.matcher ( - pk_matcher character varying(36) NOT NULL, - pk_filter character varying(36) NOT NULL, - str_subject character varying(64) NOT NULL, - str_match character varying(64) NOT NULL, - str_value character varying(6000) NOT NULL, - ts_created timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL -); - - -ALTER TABLE public.matcher OWNER TO cuebot; - --- --- TOC entry 225 (class 1259 OID 16457) --- Name: owner; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.owner ( - pk_owner character varying(36) NOT NULL, - pk_show character varying(36) NOT NULL, - str_username character varying(64) NOT NULL, - ts_created timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - ts_updated timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL -); - - -ALTER TABLE public.owner OWNER TO cuebot; - --- --- TOC entry 230 (class 1259 OID 16490) --- Name: point; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.point ( - pk_point character varying(36) NOT NULL, - pk_dept character varying(36) NOT NULL, - pk_show character varying(36) NOT NULL, - str_ti_task character varying(36), - int_cores integer DEFAULT 0 NOT NULL, - b_managed boolean DEFAULT false NOT NULL, - int_min_cores integer DEFAULT 0 NOT NULL, - float_tier numeric(16,2) DEFAULT 0 NOT NULL, - ts_updated timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - int_gpus integer DEFAULT 0 NOT NULL, - int_min_gpus integer DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.point OWNER TO cuebot; - --- --- TOC entry 247 (class 1259 OID 16630) --- Name: proc; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.proc ( - pk_proc character varying(36) NOT NULL, - pk_host character varying(36) NOT NULL, - pk_job character varying(36), - pk_show character varying(36), - pk_layer character varying(36), - pk_frame character varying(36), - int_cores_reserved bigint NOT NULL, - int_mem_reserved bigint NOT NULL, - int_mem_used bigint DEFAULT 0 NOT NULL, - int_mem_max_used bigint DEFAULT 0 NOT NULL, - b_unbooked boolean DEFAULT false NOT NULL, - int_mem_pre_reserved bigint DEFAULT 0 NOT NULL, - int_virt_used bigint DEFAULT 0 NOT NULL, - int_virt_max_used bigint DEFAULT 0 NOT NULL, - str_redirect character varying(265), - b_local boolean DEFAULT false NOT NULL, - ts_ping timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - ts_booked timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - ts_dispatched timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - int_gpu_mem_reserved bigint DEFAULT 0 NOT NULL, - int_gpus_reserved integer DEFAULT 0 NOT NULL, - int_gpu_mem_used bigint DEFAULT 0 NOT NULL, - int_gpu_mem_max_used bigint DEFAULT 0 NOT NULL, - int_gpu_mem_pre_reserved bigint DEFAULT 0 NOT NULL, - bytea_children bytea, - int_swap_used bigint DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.proc OWNER TO cuebot; - --- --- TOC entry 267 (class 1259 OID 16835) --- Name: redirect; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.redirect ( - pk_proc character varying(36) NOT NULL, - str_group_id character varying(36) NOT NULL, - int_type bigint NOT NULL, - str_destination_id character varying(512) NOT NULL, - str_name character varying(512) NOT NULL, - lng_creation_time bigint NOT NULL -); - - -ALTER TABLE public.redirect OWNER TO cuebot; - --- --- TOC entry 227 (class 1259 OID 16475) --- Name: service; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.service ( - pk_service character varying(36) NOT NULL, - str_name character varying(36) NOT NULL, - b_threadable boolean NOT NULL, - int_cores_min integer NOT NULL, - int_mem_min integer NOT NULL, - str_tags character varying(128) NOT NULL, - int_cores_max integer DEFAULT 0 NOT NULL, - int_gpu_mem_min bigint DEFAULT 0 NOT NULL, - int_timeout integer DEFAULT 0 NOT NULL, - int_timeout_llu integer DEFAULT 0 NOT NULL, - int_gpus_min integer DEFAULT 0 NOT NULL, - int_gpus_max integer DEFAULT 0 NOT NULL, - int_min_memory_increase integer DEFAULT 2097152 NOT NULL -); - - -ALTER TABLE public.service OWNER TO cuebot; - --- --- TOC entry 246 (class 1259 OID 16615) --- Name: show; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.show ( - pk_show character varying(36) NOT NULL, - str_name character varying(512) NOT NULL, - b_paused boolean DEFAULT false NOT NULL, - int_default_min_cores integer DEFAULT 100 NOT NULL, - int_default_max_cores integer DEFAULT 10000 NOT NULL, - b_booking_enabled boolean DEFAULT true NOT NULL, - b_dispatch_enabled boolean DEFAULT true NOT NULL, - b_active boolean DEFAULT true NOT NULL, - str_comment_email character varying(1024), - int_default_min_gpus integer DEFAULT 100 NOT NULL, - int_default_max_gpus integer DEFAULT 100000 NOT NULL -); - - -ALTER TABLE public.show OWNER TO cuebot; - --- --- TOC entry 234 (class 1259 OID 16515) --- Name: show_alias; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.show_alias ( - pk_show_alias character varying(36) NOT NULL, - pk_show character varying(36) NOT NULL, - str_name character varying(16) NOT NULL -); - - -ALTER TABLE public.show_alias OWNER TO cuebot; - --- --- TOC entry 223 (class 1259 OID 16448) --- Name: show_service; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.show_service ( - pk_show_service character varying(36) NOT NULL, - pk_show character varying(36) NOT NULL, - str_name character varying(36) NOT NULL, - b_threadable boolean NOT NULL, - int_cores_min integer NOT NULL, - int_mem_min integer NOT NULL, - str_tags character varying(128) NOT NULL, - int_cores_max integer DEFAULT 0 NOT NULL, - int_gpu_mem_min bigint DEFAULT 0 NOT NULL, - int_timeout integer DEFAULT 0 NOT NULL, - int_timeout_llu integer DEFAULT 0 NOT NULL, - int_gpus_min integer DEFAULT 0 NOT NULL, - int_gpus_max integer DEFAULT 0 NOT NULL, - int_min_memory_increase integer DEFAULT 2097152 NOT NULL -); - - -ALTER TABLE public.show_service OWNER TO cuebot; - --- --- TOC entry 281 (class 1259 OID 17947) --- Name: show_stats; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.show_stats ( - pk_show character varying(36) NOT NULL, - int_frame_insert_count bigint DEFAULT 0 NOT NULL, - int_job_insert_count bigint DEFAULT 0 NOT NULL, - int_frame_success_count bigint DEFAULT 0 NOT NULL, - int_frame_fail_count bigint DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.show_stats OWNER TO cuebot; - --- --- TOC entry 245 (class 1259 OID 16608) --- Name: subscription; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.subscription ( - pk_subscription character varying(36) NOT NULL, - pk_alloc character varying(36) NOT NULL, - pk_show character varying(36) NOT NULL, - int_size bigint DEFAULT 0 NOT NULL, - int_burst bigint DEFAULT 0 NOT NULL, - int_cores integer DEFAULT 0 NOT NULL, - float_tier numeric(16,2) DEFAULT 0 NOT NULL, - int_gpus integer DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.subscription OWNER TO cuebot; - --- --- TOC entry 229 (class 1259 OID 16485) --- Name: task; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.task ( - pk_task character varying(36) NOT NULL, - pk_point character varying(36) NOT NULL, - str_shot character varying(36) NOT NULL, - int_min_cores integer DEFAULT 100 NOT NULL, - int_adjust_cores integer DEFAULT 0 NOT NULL, - int_min_gpus integer DEFAULT 0 NOT NULL, - int_adjust_gpus integer DEFAULT 0 NOT NULL -); - - -ALTER TABLE public.task OWNER TO cuebot; - --- --- TOC entry 240 (class 1259 OID 16566) --- Name: task_lock; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.task_lock ( - pk_task_lock character varying(36) NOT NULL, - str_name character varying(36) NOT NULL, - int_lock bigint DEFAULT 0 NOT NULL, - int_timeout bigint DEFAULT 30 NOT NULL, - ts_lastrun timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL -); - - -ALTER TABLE public.task_lock OWNER TO cuebot; - --- --- TOC entry 221 (class 1259 OID 16437) --- Name: uncommitted_transactions; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.uncommitted_transactions ( - inst_id numeric, - sid numeric, - serial numeric, - username character varying(30), - machine character varying(64), - module character varying(48), - service_name character varying(64), - duration numeric, - dt_recorded date DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE public.uncommitted_transactions OWNER TO cuebot; - --- --- TOC entry 220 (class 1259 OID 16432) --- Name: uncommitted_transactions_bak; Type: TABLE; Schema: public; Owner: cuebot --- - -CREATE TABLE public.uncommitted_transactions_bak ( - inst_id numeric, - sid numeric, - serial numeric, - username character varying(30), - machine character varying(64), - module character varying(48), - service_name character varying(64), - duration numeric, - dt_recorded date -); - - -ALTER TABLE public.uncommitted_transactions_bak OWNER TO cuebot; - --- --- TOC entry 278 (class 1259 OID 17924) --- Name: v_history_frame; Type: VIEW; Schema: public; Owner: cuebot --- - -CREATE VIEW public.v_history_frame AS - SELECT fh.pk_frame_history, - fh.pk_frame, - fh.pk_layer, - fh.pk_job, - fh.str_name, - fh.str_state, - fh.int_mem_reserved, - fh.int_mem_max_used, - fh.int_cores, - fh.int_gpu_mem_reserved, - fh.int_gpu_mem_max_used, - fh.int_gpus, - fh.str_host, - fh.int_exit_status, - a.str_name AS str_alloc_name, - a.b_billable AS b_alloc_billable, - f.str_name AS str_facility_name, - fh.int_ts_started, - fh.int_ts_stopped, - fh.int_checkpoint_count, - NULL::text AS str_show_name, - fh.dt_last_modified - FROM (((public.frame_history fh - JOIN public.job_history jh ON (((fh.pk_job)::text = (jh.pk_job)::text))) - LEFT JOIN public.alloc a ON (((fh.pk_alloc)::text = (a.pk_alloc)::text))) - LEFT JOIN public.facility f ON (((a.pk_facility)::text = (f.pk_facility)::text))) - WHERE ((fh.dt_last_modified >= ( SELECT history_period.dt_begin - FROM public.history_period)) AND (fh.dt_last_modified < ( SELECT history_period.dt_end - FROM public.history_period))); - - -ALTER TABLE public.v_history_frame OWNER TO cuebot; - --- --- TOC entry 279 (class 1259 OID 17929) --- Name: v_history_job; Type: VIEW; Schema: public; Owner: cuebot --- - -CREATE VIEW public.v_history_job AS - SELECT jh.pk_job, - jh.str_name, - jh.str_shot, - jh.str_user, - jh.int_core_time_success, - jh.int_core_time_fail, - jh.int_gpu_time_success, - jh.int_gpu_time_fail, - jh.int_frame_count, - jh.int_layer_count, - jh.int_waiting_count, - jh.int_dead_count, - jh.int_depend_count, - jh.int_eaten_count, - jh.int_succeeded_count, - jh.int_running_count, - jh.int_max_rss, - jh.int_gpu_mem_max, - jh.b_archived, - f.str_name AS str_facility_name, - d.str_name AS str_dept_name, - jh.int_ts_started, - jh.int_ts_stopped, - s.str_name AS str_show_name, - jh.dt_last_modified - FROM public.job_history jh, - public.show s, - public.facility f, - public.dept d - WHERE (((jh.pk_show)::text = (s.pk_show)::text) AND ((jh.pk_facility)::text = (f.pk_facility)::text) AND ((jh.pk_dept)::text = (d.pk_dept)::text) AND ((jh.dt_last_modified >= ( SELECT history_period.dt_begin - FROM public.history_period)) OR (jh.int_ts_stopped = 0))); - - -ALTER TABLE public.v_history_job OWNER TO cuebot; - --- --- TOC entry 280 (class 1259 OID 17934) --- Name: v_history_layer; Type: VIEW; Schema: public; Owner: cuebot --- - -CREATE VIEW public.v_history_layer AS - SELECT lh.pk_layer, - lh.pk_job, - lh.str_name, - lh.str_type, - lh.int_cores_min, - lh.int_mem_min, - lh.int_gpus_min, - lh.int_gpu_mem_min, - lh.int_core_time_success, - lh.int_core_time_fail, - lh.int_gpu_time_success, - lh.int_gpu_time_fail, - lh.int_frame_count, - lh.int_layer_count, - lh.int_waiting_count, - lh.int_dead_count, - lh.int_depend_count, - lh.int_eaten_count, - lh.int_succeeded_count, - lh.int_running_count, - lh.int_max_rss, - lh.int_gpu_mem_max, - lh.b_archived, - lh.str_services, - s.str_name AS str_show_name, - lh.dt_last_modified - FROM public.layer_history lh, - public.job_history jh, - public.show s - WHERE (((lh.pk_job)::text = (jh.pk_job)::text) AND ((jh.pk_show)::text = (s.pk_show)::text) AND (jh.dt_last_modified >= ( SELECT history_period.dt_begin - FROM public.history_period)) AND (jh.dt_last_modified < ( SELECT history_period.dt_end - FROM public.history_period))); - - -ALTER TABLE public.v_history_layer OWNER TO cuebot; - --- --- TOC entry 276 (class 1259 OID 17914) --- Name: vs_alloc_usage; Type: VIEW; Schema: public; Owner: cuebot --- - -CREATE VIEW public.vs_alloc_usage AS - SELECT alloc.pk_alloc, - COALESCE(sum(host.int_cores), (0)::numeric) AS int_cores, - COALESCE(sum(host.int_cores_idle), (0)::numeric) AS int_idle_cores, - COALESCE(sum((host.int_cores - host.int_cores_idle)), (0)::numeric) AS int_running_cores, - COALESCE(( SELECT sum(host_1.int_cores) AS sum - FROM public.host host_1 - WHERE (((host_1.pk_alloc)::text = (alloc.pk_alloc)::text) AND (((host_1.str_lock_state)::text = 'NIMBY_LOCKED'::text) OR ((host_1.str_lock_state)::text = 'LOCKED'::text)))), (0)::numeric) AS int_locked_cores, - COALESCE(( SELECT sum(h.int_cores_idle) AS sum - FROM public.host h, - public.host_stat hs - WHERE (((h.pk_host)::text = (hs.pk_host)::text) AND ((h.pk_alloc)::text = (alloc.pk_alloc)::text) AND ((h.str_lock_state)::text = 'OPEN'::text) AND ((hs.str_state)::text = 'UP'::text))), (0)::numeric) AS int_available_cores, - COALESCE(sum(host.int_gpus), (0)::numeric) AS int_gpus, - COALESCE(sum(host.int_gpus_idle), (0)::numeric) AS int_idle_gpus, - COALESCE(sum((host.int_gpus - host.int_gpus_idle)), (0)::numeric) AS int_running_gpus, - COALESCE(( SELECT sum(host_1.int_gpus) AS sum - FROM public.host host_1 - WHERE (((host_1.pk_alloc)::text = (alloc.pk_alloc)::text) AND (((host_1.str_lock_state)::text = 'NIMBY_LOCKED'::text) OR ((host_1.str_lock_state)::text = 'LOCKED'::text)))), (0)::numeric) AS int_locked_gpus, - COALESCE(( SELECT sum(h.int_gpus_idle) AS sum - FROM public.host h, - public.host_stat hs - WHERE (((h.pk_host)::text = (hs.pk_host)::text) AND ((h.pk_alloc)::text = (alloc.pk_alloc)::text) AND ((h.str_lock_state)::text = 'OPEN'::text) AND ((hs.str_state)::text = 'UP'::text))), (0)::numeric) AS int_available_gpus, - count(host.pk_host) AS int_hosts, - ( SELECT count(*) AS count - FROM public.host host_1 - WHERE (((host_1.pk_alloc)::text = (alloc.pk_alloc)::text) AND ((host_1.str_lock_state)::text = 'LOCKED'::text))) AS int_locked_hosts, - ( SELECT count(*) AS count - FROM public.host h, - public.host_stat hs - WHERE (((h.pk_host)::text = (hs.pk_host)::text) AND ((h.pk_alloc)::text = (alloc.pk_alloc)::text) AND ((hs.str_state)::text = 'DOWN'::text))) AS int_down_hosts - FROM (public.alloc - LEFT JOIN public.host ON (((alloc.pk_alloc)::text = (host.pk_alloc)::text))) - GROUP BY alloc.pk_alloc; - - -ALTER TABLE public.vs_alloc_usage OWNER TO cuebot; - --- --- TOC entry 277 (class 1259 OID 17919) --- Name: vs_folder_counts; Type: VIEW; Schema: public; Owner: cuebot --- - -CREATE VIEW public.vs_folder_counts AS - SELECT folder.pk_folder, - COALESCE(sum(job_stat.int_depend_count), (0)::numeric) AS int_depend_count, - COALESCE(sum(job_stat.int_waiting_count), (0)::numeric) AS int_waiting_count, - COALESCE(sum(job_stat.int_running_count), (0)::numeric) AS int_running_count, - COALESCE(sum(job_stat.int_dead_count), (0)::numeric) AS int_dead_count, - COALESCE(sum(job_resource.int_cores), (0)::numeric) AS int_cores, - COALESCE(sum(job_resource.int_gpus), (0)::bigint) AS int_gpus, - COALESCE(count(job.pk_job), (0)::bigint) AS int_job_count - FROM (((public.folder - LEFT JOIN public.job ON ((((folder.pk_folder)::text = (job.pk_folder)::text) AND ((job.str_state)::text = 'PENDING'::text)))) - LEFT JOIN public.job_stat ON (((job.pk_job)::text = (job_stat.pk_job)::text))) - LEFT JOIN public.job_resource ON (((job.pk_job)::text = (job_resource.pk_job)::text))) - GROUP BY folder.pk_folder; - - -ALTER TABLE public.vs_folder_counts OWNER TO cuebot; - --- --- TOC entry 275 (class 1259 OID 17909) --- Name: vs_job_resource; Type: VIEW; Schema: public; Owner: cuebot --- - -CREATE VIEW public.vs_job_resource AS - SELECT job.pk_job, - count(proc.pk_proc) AS int_procs, - COALESCE(sum(proc.int_cores_reserved), (0)::numeric) AS int_cores, - COALESCE(sum(proc.int_gpus_reserved), (0)::bigint) AS int_gpus, - COALESCE(sum(proc.int_mem_reserved), (0)::numeric) AS int_mem_reserved - FROM (public.job - LEFT JOIN public.proc ON (((proc.pk_job)::text = (job.pk_job)::text))) - GROUP BY job.pk_job; - - -ALTER TABLE public.vs_job_resource OWNER TO cuebot; - --- --- TOC entry 274 (class 1259 OID 17904) --- Name: vs_show_resource; Type: VIEW; Schema: public; Owner: cuebot --- - -CREATE VIEW public.vs_show_resource AS - SELECT job.pk_show, - sum(job_resource.int_cores) AS int_cores, - sum(job_resource.int_gpus) AS int_gpus - FROM public.job, - public.job_resource - WHERE (((job.pk_job)::text = (job_resource.pk_job)::text) AND ((job.str_state)::text = 'PENDING'::text)) - GROUP BY job.pk_show; - - -ALTER TABLE public.vs_show_resource OWNER TO cuebot; - --- --- TOC entry 268 (class 1259 OID 17122) --- Name: vs_show_stat; Type: VIEW; Schema: public; Owner: cuebot --- - -CREATE VIEW public.vs_show_stat AS - SELECT job.pk_show, - sum((job_stat.int_waiting_count + job_stat.int_depend_count)) AS int_pending_count, - sum(job_stat.int_running_count) AS int_running_count, - sum(job_stat.int_dead_count) AS int_dead_count, - count(1) AS int_job_count - FROM public.job_stat, - public.job - WHERE (((job_stat.pk_job)::text = (job.pk_job)::text) AND ((job.str_state)::text = 'PENDING'::text)) - GROUP BY job.pk_show; - - -ALTER TABLE public.vs_show_stat OWNER TO cuebot; - --- --- TOC entry 269 (class 1259 OID 17142) --- Name: vs_waiting; Type: VIEW; Schema: public; Owner: cuebot --- - -CREATE VIEW public.vs_waiting AS - SELECT job.pk_show - FROM public.job_resource jr, - public.job_stat, - public.job - WHERE (((job_stat.pk_job)::text = (job.pk_job)::text) AND ((jr.pk_job)::text = (job.pk_job)::text) AND ((job.str_state)::text = 'PENDING'::text) AND (job.b_paused = false) AND ((jr.int_max_cores - jr.int_cores) >= 100) AND (job_stat.int_waiting_count <> 0)) - GROUP BY job.pk_show; - - -ALTER TABLE public.vs_waiting OWNER TO cuebot; - --- --- TOC entry 3457 (class 2604 OID 17976) --- Name: layer_output ser_order; Type: DEFAULT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer_output ALTER COLUMN ser_order SET DEFAULT nextval('public.layer_output_ser_order_seq'::regclass); - - --- --- TOC entry 4031 (class 2606 OID 17052) --- Name: action c_action_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.action - ADD CONSTRAINT c_action_pk PRIMARY KEY (pk_action); - - --- --- TOC entry 4026 (class 2606 OID 17053) --- Name: alloc c_alloc_name_uniq; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.alloc - ADD CONSTRAINT c_alloc_name_uniq UNIQUE (str_name); - - --- --- TOC entry 4028 (class 2606 OID 17054) --- Name: alloc c_alloc_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.alloc - ADD CONSTRAINT c_alloc_pk PRIMARY KEY (pk_alloc); - - --- --- TOC entry 4022 (class 2606 OID 17055) --- Name: comments c_comment_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.comments - ADD CONSTRAINT c_comment_pk PRIMARY KEY (pk_comment); - - --- --- TOC entry 4005 (class 2606 OID 17058) --- Name: depend c_depend_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.depend - ADD CONSTRAINT c_depend_pk PRIMARY KEY (pk_depend); - - --- --- TOC entry 3835 (class 2606 OID 17106) --- Name: dept c_dept_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.dept - ADD CONSTRAINT c_dept_pk PRIMARY KEY (pk_dept); - - --- --- TOC entry 3837 (class 2606 OID 17105) --- Name: facility c_facility_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.facility - ADD CONSTRAINT c_facility_pk PRIMARY KEY (pk_facility); - - --- --- TOC entry 4002 (class 2606 OID 17059) --- Name: filter c_filter_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.filter - ADD CONSTRAINT c_filter_pk PRIMARY KEY (pk_filter); - - --- --- TOC entry 3991 (class 2606 OID 17062) --- Name: folder_level c_folder_level_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.folder_level - ADD CONSTRAINT c_folder_level_pk PRIMARY KEY (pk_folder_level); - - --- --- TOC entry 3993 (class 2606 OID 17063) --- Name: folder_level c_folder_level_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.folder_level - ADD CONSTRAINT c_folder_level_uk UNIQUE (pk_folder); - - --- --- TOC entry 3995 (class 2606 OID 17061) --- Name: folder c_folder_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.folder - ADD CONSTRAINT c_folder_pk PRIMARY KEY (pk_folder); - - --- --- TOC entry 3825 (class 2606 OID 17108) --- Name: folder_resource c_folder_resource_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.folder_resource - ADD CONSTRAINT c_folder_resource_pk PRIMARY KEY (pk_folder_resource); - - --- --- TOC entry 3997 (class 2606 OID 17060) --- Name: folder c_folder_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.folder - ADD CONSTRAINT c_folder_uk UNIQUE (pk_parent_folder, str_name); - - --- --- TOC entry 3756 (class 2606 OID 17116) --- Name: frame_history c_frame_history_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.frame_history - ADD CONSTRAINT c_frame_history_pk PRIMARY KEY (pk_frame_history); - - --- --- TOC entry 3982 (class 2606 OID 17064) --- Name: frame c_frame_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.frame - ADD CONSTRAINT c_frame_pk PRIMARY KEY (pk_frame); - - --- --- TOC entry 4044 (class 2606 OID 17968) --- Name: frame_state_display_overrides c_frame_state_override; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.frame_state_display_overrides - ADD CONSTRAINT c_frame_state_override UNIQUE (pk_frame, str_frame_state); - - --- --- TOC entry 3984 (class 2606 OID 17065) --- Name: frame c_frame_str_name_unq; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.frame - ADD CONSTRAINT c_frame_str_name_unq UNIQUE (str_name, pk_job); - - --- --- TOC entry 3766 (class 2606 OID 17115) --- Name: history_period c_history_period_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.history_period - ADD CONSTRAINT c_history_period_pk PRIMARY KEY (pk); - - --- --- TOC entry 3967 (class 2606 OID 17067) --- Name: host c_host_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.host - ADD CONSTRAINT c_host_pk PRIMARY KEY (pk_host); - - --- --- TOC entry 3960 (class 2606 OID 17070) --- Name: host_stat c_host_stat_pk_host_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.host_stat - ADD CONSTRAINT c_host_stat_pk_host_uk UNIQUE (pk_host); - - --- --- TOC entry 3861 (class 2606 OID 17100) --- Name: host_tag c_host_tag_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.host_tag - ADD CONSTRAINT c_host_tag_pk PRIMARY KEY (pk_host_tag); - - --- --- TOC entry 3969 (class 2606 OID 17574) --- Name: host c_host_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.host - ADD CONSTRAINT c_host_uk UNIQUE (str_name); - - --- --- TOC entry 3962 (class 2606 OID 17069) --- Name: host_stat c_hoststat_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.host_stat - ADD CONSTRAINT c_hoststat_pk PRIMARY KEY (pk_host_stat); - - --- --- TOC entry 3942 (class 2606 OID 17073) --- Name: job_env c_job_env_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job_env - ADD CONSTRAINT c_job_env_pk PRIMARY KEY (pk_job_env); - - --- --- TOC entry 3849 (class 2606 OID 17102) --- Name: job_history c_job_history_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job_history - ADD CONSTRAINT c_job_history_pk PRIMARY KEY (pk_job); - - --- --- TOC entry 3821 (class 2606 OID 17109) --- Name: job_mem c_job_mem_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job_mem - ADD CONSTRAINT c_job_mem_pk PRIMARY KEY (pk_job_mem); - - --- --- TOC entry 3945 (class 2606 OID 17071) --- Name: job c_job_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job - ADD CONSTRAINT c_job_pk PRIMARY KEY (pk_job); - - --- --- TOC entry 3839 (class 2606 OID 17104) --- Name: job_post c_job_post_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job_post - ADD CONSTRAINT c_job_post_pk PRIMARY KEY (pk_job_post); - - --- --- TOC entry 3869 (class 2606 OID 17096) --- Name: job_resource c_job_resource_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job_resource - ADD CONSTRAINT c_job_resource_pk PRIMARY KEY (pk_job_resource); - - --- --- TOC entry 3871 (class 2606 OID 17097) --- Name: job_resource c_job_resource_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job_resource - ADD CONSTRAINT c_job_resource_uk UNIQUE (pk_job); - - --- --- TOC entry 3881 (class 2606 OID 17095) --- Name: job_stat c_job_stat_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job_stat - ADD CONSTRAINT c_job_stat_pk PRIMARY KEY (pk_job_stat); - - --- --- TOC entry 3947 (class 2606 OID 17072) --- Name: job c_job_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job - ADD CONSTRAINT c_job_uk UNIQUE (str_visible_name); - - --- --- TOC entry 3865 (class 2606 OID 17098) --- Name: job_usage c_job_usage_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job_usage - ADD CONSTRAINT c_job_usage_pk PRIMARY KEY (pk_job_usage); - - --- --- TOC entry 3867 (class 2606 OID 17099) --- Name: job_usage c_job_usage_pk_job_uniq; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job_usage - ADD CONSTRAINT c_job_usage_pk_job_uniq UNIQUE (pk_job); - - --- --- TOC entry 3920 (class 2606 OID 17083) --- Name: layer_env c_layer_env_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer_env - ADD CONSTRAINT c_layer_env_pk PRIMARY KEY (pk_layer_env); - - --- --- TOC entry 3843 (class 2606 OID 17103) --- Name: layer_history c_layer_history_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer_history - ADD CONSTRAINT c_layer_history_pk PRIMARY KEY (pk_layer); - - --- --- TOC entry 3816 (class 2606 OID 17110) --- Name: layer_mem c_layer_mem_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer_mem - ADD CONSTRAINT c_layer_mem_pk PRIMARY KEY (pk_layer_mem); - - --- --- TOC entry 3924 (class 2606 OID 17081) --- Name: layer c_layer_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer - ADD CONSTRAINT c_layer_pk PRIMARY KEY (pk_layer); - - --- --- TOC entry 3926 (class 2606 OID 17082) --- Name: layer c_layer_str_name_unq; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer - ADD CONSTRAINT c_layer_str_name_unq UNIQUE (str_name, pk_job); - - --- --- TOC entry 3905 (class 2606 OID 17087) --- Name: layer_usage c_layer_usage_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer_usage - ADD CONSTRAINT c_layer_usage_pk PRIMARY KEY (pk_layer_usage); - - --- --- TOC entry 3907 (class 2606 OID 17088) --- Name: layer_usage c_layer_usage_pk_layer_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer_usage - ADD CONSTRAINT c_layer_usage_pk_layer_uk UNIQUE (pk_layer); - - --- --- TOC entry 3915 (class 2606 OID 17084) --- Name: layer_resource c_layerresource_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer_resource - ADD CONSTRAINT c_layerresource_pk PRIMARY KEY (pk_layer_resource); - - --- --- TOC entry 3917 (class 2606 OID 17085) --- Name: layer_resource c_layerresource_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer_resource - ADD CONSTRAINT c_layerresource_uk UNIQUE (pk_layer); - - --- --- TOC entry 3910 (class 2606 OID 17086) --- Name: layer_stat c_layerstat_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer_stat - ADD CONSTRAINT c_layerstat_pk PRIMARY KEY (pk_layer_stat); - - --- --- TOC entry 3902 (class 2606 OID 17089) --- Name: matcher c_matcher_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.matcher - ADD CONSTRAINT c_matcher_pk PRIMARY KEY (pk_matcher); - - --- --- TOC entry 3778 (class 2606 OID 17078) --- Name: deed c_pk_deed; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.deed - ADD CONSTRAINT c_pk_deed PRIMARY KEY (pk_deed); - - --- --- TOC entry 3786 (class 2606 OID 17076) --- Name: host_local c_pk_host_local; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.host_local - ADD CONSTRAINT c_pk_host_local PRIMARY KEY (pk_host_local); - - --- --- TOC entry 3800 (class 2606 OID 17074) --- Name: job_local c_pk_job_local; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job_local - ADD CONSTRAINT c_pk_job_local PRIMARY KEY (pk_job_local); - - --- --- TOC entry 3768 (class 2606 OID 17080) --- Name: layer_output c_pk_layer_output; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer_output - ADD CONSTRAINT c_pk_layer_output PRIMARY KEY (pk_layer_output); - - --- --- TOC entry 3782 (class 2606 OID 17077) --- Name: owner c_pk_owner; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.owner - ADD CONSTRAINT c_pk_owner PRIMARY KEY (pk_owner); - - --- --- TOC entry 4018 (class 2606 OID 17056) --- Name: config c_pk_pkconfig; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.config - ADD CONSTRAINT c_pk_pkconfig PRIMARY KEY (pk_config); - - --- --- TOC entry 3795 (class 2606 OID 17075) --- Name: service c_pk_service; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.service - ADD CONSTRAINT c_pk_service PRIMARY KEY (pk_service); - - --- --- TOC entry 3773 (class 2606 OID 17079) --- Name: show_service c_pk_show_service; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.show_service - ADD CONSTRAINT c_pk_show_service PRIMARY KEY (pk_show_service); - - --- --- TOC entry 3809 (class 2606 OID 17111) --- Name: point c_point_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.point - ADD CONSTRAINT c_point_pk PRIMARY KEY (pk_point); - - --- --- TOC entry 3811 (class 2606 OID 17112) --- Name: point c_point_pk_show_dept; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.point - ADD CONSTRAINT c_point_pk_show_dept UNIQUE (pk_show, pk_dept); - - --- --- TOC entry 3893 (class 2606 OID 17090) --- Name: proc c_proc_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.proc - ADD CONSTRAINT c_proc_pk PRIMARY KEY (pk_proc); - - --- --- TOC entry 3895 (class 2606 OID 17091) --- Name: proc c_proc_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.proc - ADD CONSTRAINT c_proc_uk UNIQUE (pk_frame); - - --- --- TOC entry 4035 (class 2606 OID 17117) --- Name: redirect c_redirect_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.redirect - ADD CONSTRAINT c_redirect_pk PRIMARY KEY (pk_proc); - - --- --- TOC entry 3832 (class 2606 OID 17107) --- Name: show_alias c_show_alias_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.show_alias - ADD CONSTRAINT c_show_alias_pk PRIMARY KEY (pk_show_alias); - - --- --- TOC entry 3891 (class 2606 OID 17092) --- Name: show c_show_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.show - ADD CONSTRAINT c_show_pk PRIMARY KEY (pk_show); - - --- --- TOC entry 4042 (class 2606 OID 17955) --- Name: show_stats c_show_stats_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.show_stats - ADD CONSTRAINT c_show_stats_pk PRIMARY KEY (pk_show); - - --- --- TOC entry 4020 (class 2606 OID 17057) --- Name: config c_show_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.config - ADD CONSTRAINT c_show_uk UNIQUE (str_key); - - --- --- TOC entry 3971 (class 2606 OID 17066) --- Name: host c_str_host_fqdn_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.host - ADD CONSTRAINT c_str_host_fqdn_uk UNIQUE (str_fqdn); - - --- --- TOC entry 3885 (class 2606 OID 17093) --- Name: subscription c_subscription_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.subscription - ADD CONSTRAINT c_subscription_pk PRIMARY KEY (pk_subscription); - - --- --- TOC entry 3887 (class 2606 OID 17094) --- Name: subscription c_subscription_uk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.subscription - ADD CONSTRAINT c_subscription_uk UNIQUE (pk_show, pk_alloc); - - --- --- TOC entry 3859 (class 2606 OID 17101) --- Name: task_lock c_task_lock_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.task_lock - ADD CONSTRAINT c_task_lock_pk PRIMARY KEY (pk_task_lock); - - --- --- TOC entry 3804 (class 2606 OID 17113) --- Name: task c_task_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.task - ADD CONSTRAINT c_task_pk PRIMARY KEY (pk_task); - - --- --- TOC entry 3806 (class 2606 OID 17114) --- Name: task c_task_uniq; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.task - ADD CONSTRAINT c_task_uniq UNIQUE (str_shot, pk_point); - - --- --- TOC entry 3753 (class 2606 OID 16392) --- Name: flyway_schema_history flyway_schema_history_pk; Type: CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.flyway_schema_history - ADD CONSTRAINT flyway_schema_history_pk PRIMARY KEY (installed_rank); - - --- --- TOC entry 3754 (class 1259 OID 16393) --- Name: flyway_schema_history_s_idx; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX flyway_schema_history_s_idx ON public.flyway_schema_history USING btree (success); - - --- --- TOC entry 4032 (class 1259 OID 16858) --- Name: i_action_pk_filter; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_action_pk_filter ON public.action USING btree (pk_filter); - - --- --- TOC entry 4033 (class 1259 OID 16859) --- Name: i_action_pk_group; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_action_pk_group ON public.action USING btree (pk_folder); - - --- --- TOC entry 4029 (class 1259 OID 16861) --- Name: i_alloc_pk_facility; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_alloc_pk_facility ON public.alloc USING btree (pk_facility); - - --- --- TOC entry 3948 (class 1259 OID 16909) --- Name: i_booking_3; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_booking_3 ON public.job USING btree (str_state, b_paused, pk_show, pk_facility); - - --- --- TOC entry 4023 (class 1259 OID 16865) --- Name: i_comment_pk_host; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_comment_pk_host ON public.comments USING btree (pk_host); - - --- --- TOC entry 4024 (class 1259 OID 16864) --- Name: i_comment_pk_job; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_comment_pk_job ON public.comments USING btree (pk_job); - - --- --- TOC entry 3779 (class 1259 OID 17030) --- Name: i_deed_pk_host; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE UNIQUE INDEX i_deed_pk_host ON public.deed USING btree (pk_host); - - --- --- TOC entry 3780 (class 1259 OID 17031) --- Name: i_deed_pk_owner; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_deed_pk_owner ON public.deed USING btree (pk_owner); - - --- --- TOC entry 4006 (class 1259 OID 16875) --- Name: i_depend_b_composite; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_depend_b_composite ON public.depend USING btree (b_composite); - - --- --- TOC entry 4007 (class 1259 OID 16874) --- Name: i_depend_er_frame; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_depend_er_frame ON public.depend USING btree (pk_frame_depend_er); - - --- --- TOC entry 4008 (class 1259 OID 16870) --- Name: i_depend_er_layer; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_depend_er_layer ON public.depend USING btree (pk_layer_depend_er); - - --- --- TOC entry 4009 (class 1259 OID 16872) --- Name: i_depend_on_frame; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_depend_on_frame ON public.depend USING btree (pk_frame_depend_on); - - --- --- TOC entry 4010 (class 1259 OID 16869) --- Name: i_depend_on_layer; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_depend_on_layer ON public.depend USING btree (pk_layer_depend_on); - - --- --- TOC entry 4011 (class 1259 OID 16879) --- Name: i_depend_pk_er_job; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_depend_pk_er_job ON public.depend USING btree (pk_job_depend_er); - - --- --- TOC entry 4012 (class 1259 OID 16878) --- Name: i_depend_pk_on_job; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_depend_pk_on_job ON public.depend USING btree (pk_job_depend_on); - - --- --- TOC entry 4013 (class 1259 OID 16877) --- Name: i_depend_pkparent; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_depend_pkparent ON public.depend USING btree (pk_parent); - - --- --- TOC entry 4014 (class 1259 OID 16868) --- Name: i_depend_signature; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE UNIQUE INDEX i_depend_signature ON public.depend USING btree (str_signature); - - --- --- TOC entry 4015 (class 1259 OID 16871) --- Name: i_depend_str_target; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_depend_str_target ON public.depend USING btree (str_target); - - --- --- TOC entry 4016 (class 1259 OID 16873) --- Name: i_depend_str_type; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_depend_str_type ON public.depend USING btree (str_type); - - --- --- TOC entry 4003 (class 1259 OID 16881) --- Name: i_filters_pk_show; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_filters_pk_show ON public.filter USING btree (pk_show); - - --- --- TOC entry 3998 (class 1259 OID 16883) --- Name: i_folder_pkparentfolder; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_folder_pkparentfolder ON public.folder USING btree (pk_parent_folder); - - --- --- TOC entry 3999 (class 1259 OID 16884) --- Name: i_folder_pkshow; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_folder_pkshow ON public.folder USING btree (pk_show); - - --- --- TOC entry 3826 (class 1259 OID 16996) --- Name: i_folder_res_int_max_cores; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_folder_res_int_max_cores ON public.folder_resource USING btree (int_max_cores); - - --- --- TOC entry 3827 (class 1259 OID 17747) --- Name: i_folder_res_int_max_gpus; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_folder_res_int_max_gpus ON public.folder_resource USING btree (int_max_gpus); - - --- --- TOC entry 3828 (class 1259 OID 16997) --- Name: i_folder_resource_fl_tier; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_folder_resource_fl_tier ON public.folder_resource USING btree (float_tier); - - --- --- TOC entry 3829 (class 1259 OID 17961) --- Name: i_folder_resource_int_cores; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_folder_resource_int_cores ON public.folder_resource USING btree (int_cores); - - --- --- TOC entry 4000 (class 1259 OID 16885) --- Name: i_folder_strname; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_folder_strname ON public.folder USING btree (str_name); - - --- --- TOC entry 3830 (class 1259 OID 16998) --- Name: i_folderresource_pkfolder; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_folderresource_pkfolder ON public.folder_resource USING btree (pk_folder); - - --- --- TOC entry 3985 (class 1259 OID 16890) --- Name: i_frame_dispatch_idx; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_frame_dispatch_idx ON public.frame USING btree (int_dispatch_order, int_layer_order); - - --- --- TOC entry 3757 (class 1259 OID 17040) --- Name: i_frame_history_int_exit_stat; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_frame_history_int_exit_stat ON public.frame_history USING btree (int_exit_status); - - --- --- TOC entry 3758 (class 1259 OID 17041) --- Name: i_frame_history_int_ts_stopped; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_frame_history_int_ts_stopped ON public.frame_history USING btree (int_ts_stopped); - - --- --- TOC entry 3759 (class 1259 OID 17042) --- Name: i_frame_history_pk_alloc; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_frame_history_pk_alloc ON public.frame_history USING btree (pk_alloc); - - --- --- TOC entry 3760 (class 1259 OID 17043) --- Name: i_frame_history_pk_frame; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_frame_history_pk_frame ON public.frame_history USING btree (pk_frame); - - --- --- TOC entry 3761 (class 1259 OID 17044) --- Name: i_frame_history_pk_job; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_frame_history_pk_job ON public.frame_history USING btree (pk_job); - - --- --- TOC entry 3762 (class 1259 OID 17045) --- Name: i_frame_history_pk_layer; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_frame_history_pk_layer ON public.frame_history USING btree (pk_layer); - - --- --- TOC entry 3763 (class 1259 OID 17046) --- Name: i_frame_history_str_state; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_frame_history_str_state ON public.frame_history USING btree (str_state); - - --- --- TOC entry 3764 (class 1259 OID 17039) --- Name: i_frame_history_ts_start_stop; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_frame_history_ts_start_stop ON public.frame_history USING btree (int_ts_started, int_ts_stopped); - - --- --- TOC entry 3986 (class 1259 OID 17883) --- Name: i_frame_int_gpu_mem_reserved; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_frame_int_gpu_mem_reserved ON public.frame USING btree (int_gpu_mem_reserved); - - --- --- TOC entry 3987 (class 1259 OID 16891) --- Name: i_frame_pk_job; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_frame_pk_job ON public.frame USING btree (pk_job); - - --- --- TOC entry 3988 (class 1259 OID 16893) --- Name: i_frame_pkjoblayer; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_frame_pkjoblayer ON public.frame USING btree (pk_layer); - - --- --- TOC entry 3989 (class 1259 OID 16889) --- Name: i_frame_state_job; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_frame_state_job ON public.frame USING btree (str_state, pk_job); - - --- --- TOC entry 3972 (class 1259 OID 17847) --- Name: i_host_int_gpu; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_host_int_gpu ON public.host USING btree (int_gpu_mem); - - --- --- TOC entry 3973 (class 1259 OID 17862) --- Name: i_host_int_gpu_idle; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_host_int_gpu_idle ON public.host USING btree (int_gpu_mem_idle); - - --- --- TOC entry 3974 (class 1259 OID 17878) --- Name: i_host_int_gpu_mem; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_host_int_gpu_mem ON public.host USING btree (int_gpu_mem); - - --- --- TOC entry 3975 (class 1259 OID 17879) --- Name: i_host_int_gpu_mem_idle; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_host_int_gpu_mem_idle ON public.host USING btree (int_gpu_mem_idle); - - --- --- TOC entry 3976 (class 1259 OID 17880) --- Name: i_host_int_gpus; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_host_int_gpus ON public.host USING btree (int_gpus); - - --- --- TOC entry 3977 (class 1259 OID 17881) --- Name: i_host_int_gpus_idle; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_host_int_gpus_idle ON public.host USING btree (int_gpus_idle); - - --- --- TOC entry 3787 (class 1259 OID 17020) --- Name: i_host_local; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_host_local ON public.host_local USING btree (pk_host); - - --- --- TOC entry 3788 (class 1259 OID 17702) --- Name: i_host_local_int_gpu_idle; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_host_local_int_gpu_idle ON public.host_local USING btree (int_gpu_mem_idle); - - --- --- TOC entry 3789 (class 1259 OID 17713) --- Name: i_host_local_int_gpu_max; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_host_local_int_gpu_max ON public.host_local USING btree (int_gpu_mem_max); - - --- --- TOC entry 3790 (class 1259 OID 17725) --- Name: i_host_local_int_gpus_idle; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_host_local_int_gpus_idle ON public.host_local USING btree (int_gpus_idle); - - --- --- TOC entry 3791 (class 1259 OID 17726) --- Name: i_host_local_int_gpus_max; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_host_local_int_gpus_max ON public.host_local USING btree (int_gpus_max); - - --- --- TOC entry 3792 (class 1259 OID 17022) --- Name: i_host_local_pk_job; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_host_local_pk_job ON public.host_local USING btree (pk_job); - - --- --- TOC entry 3793 (class 1259 OID 17023) --- Name: i_host_local_unique; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE UNIQUE INDEX i_host_local_unique ON public.host_local USING btree (pk_host, pk_job); - - --- --- TOC entry 3978 (class 1259 OID 16899) --- Name: i_host_pkalloc; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_host_pkalloc ON public.host USING btree (pk_alloc); - - --- --- TOC entry 3963 (class 1259 OID 17837) --- Name: i_host_stat_int_gpu_mem_free; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_host_stat_int_gpu_mem_free ON public.host_stat USING btree (int_gpu_mem_free); - - --- --- TOC entry 3964 (class 1259 OID 17827) --- Name: i_host_stat_int_gpu_mem_total; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_host_stat_int_gpu_mem_total ON public.host_stat USING btree (int_gpu_mem_total); - - --- --- TOC entry 3965 (class 1259 OID 16906) --- Name: i_host_stat_str_os; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_host_stat_str_os ON public.host_stat USING btree (str_os); - - --- --- TOC entry 3862 (class 1259 OID 16972) --- Name: i_host_str_tag_type; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_host_str_tag_type ON public.host_tag USING btree (str_tag_type); - - --- --- TOC entry 3979 (class 1259 OID 16903) --- Name: i_host_str_tags; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_host_str_tags ON public.host USING btree (str_tags); - - --- --- TOC entry 3980 (class 1259 OID 16900) --- Name: i_host_strlockstate; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_host_strlockstate ON public.host USING btree (str_lock_state); - - --- --- TOC entry 3863 (class 1259 OID 16971) --- Name: i_host_tag_pk_host; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_host_tag_pk_host ON public.host_tag USING btree (pk_host); - - --- --- TOC entry 3943 (class 1259 OID 16921) --- Name: i_job_env_pk_job; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_env_pk_job ON public.job_env USING btree (pk_job); - - --- --- TOC entry 3850 (class 1259 OID 16976) --- Name: i_job_history_b_archived; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_history_b_archived ON public.job_history USING btree (b_archived); - - --- --- TOC entry 3851 (class 1259 OID 16981) --- Name: i_job_history_pk_dept; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_history_pk_dept ON public.job_history USING btree (pk_dept); - - --- --- TOC entry 3852 (class 1259 OID 16982) --- Name: i_job_history_pk_facility; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_history_pk_facility ON public.job_history USING btree (pk_facility); - - --- --- TOC entry 3853 (class 1259 OID 16975) --- Name: i_job_history_pk_show; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_history_pk_show ON public.job_history USING btree (pk_show); - - --- --- TOC entry 3854 (class 1259 OID 16978) --- Name: i_job_history_str_name; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_history_str_name ON public.job_history USING btree (str_name); - - --- --- TOC entry 3855 (class 1259 OID 16979) --- Name: i_job_history_str_shot; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_history_str_shot ON public.job_history USING btree (str_shot); - - --- --- TOC entry 3856 (class 1259 OID 16980) --- Name: i_job_history_str_user; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_history_str_user ON public.job_history USING btree (str_user); - - --- --- TOC entry 3857 (class 1259 OID 16977) --- Name: i_job_history_ts_start_stop; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_history_ts_start_stop ON public.job_history USING btree (int_ts_started, int_ts_stopped); - - --- --- TOC entry 3949 (class 1259 OID 17960) --- Name: i_job_int_min_cores; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_int_min_cores ON public.job USING btree (int_min_cores); - - --- --- TOC entry 3801 (class 1259 OID 17016) --- Name: i_job_local_pk_host; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE UNIQUE INDEX i_job_local_pk_host ON public.job_local USING btree (pk_host); - - --- --- TOC entry 3802 (class 1259 OID 17015) --- Name: i_job_local_pk_job; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE UNIQUE INDEX i_job_local_pk_job ON public.job_local USING btree (pk_job); - - --- --- TOC entry 3822 (class 1259 OID 17821) --- Name: i_job_mem_int_max_rss; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_mem_int_max_rss ON public.job_mem USING btree (int_max_rss); - - --- --- TOC entry 3823 (class 1259 OID 17000) --- Name: i_job_mem_pk_job; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE UNIQUE INDEX i_job_mem_pk_job ON public.job_mem USING btree (pk_job); - - --- --- TOC entry 3950 (class 1259 OID 16911) --- Name: i_job_pk_dept; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_pk_dept ON public.job USING btree (pk_dept); - - --- --- TOC entry 3951 (class 1259 OID 16912) --- Name: i_job_pk_facility; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_pk_facility ON public.job USING btree (pk_facility); - - --- --- TOC entry 3952 (class 1259 OID 16916) --- Name: i_job_pkgroup; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_pkgroup ON public.job USING btree (pk_folder); - - --- --- TOC entry 3953 (class 1259 OID 16917) --- Name: i_job_pkshow; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_pkshow ON public.job USING btree (pk_show); - - --- --- TOC entry 3840 (class 1259 OID 16990) --- Name: i_job_post_pk_job; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_post_pk_job ON public.job_post USING btree (pk_job); - - --- --- TOC entry 3841 (class 1259 OID 16988) --- Name: i_job_post_pk_post_job; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_post_pk_post_job ON public.job_post USING btree (pk_post_job); - - --- --- TOC entry 3872 (class 1259 OID 16966) --- Name: i_job_resource_cores; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_resource_cores ON public.job_resource USING btree (int_cores); - - --- --- TOC entry 3873 (class 1259 OID 17766) --- Name: i_job_resource_gpus; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_resource_gpus ON public.job_resource USING btree (int_gpus); - - --- --- TOC entry 3874 (class 1259 OID 17765) --- Name: i_job_resource_gpus_min_max; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_resource_gpus_min_max ON public.job_resource USING btree (int_min_gpus, int_max_gpus); - - --- --- TOC entry 3875 (class 1259 OID 17959) --- Name: i_job_resource_int_priority; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_resource_int_priority ON public.job_resource USING btree (int_priority); - - --- --- TOC entry 3876 (class 1259 OID 16967) --- Name: i_job_resource_max_c; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_resource_max_c ON public.job_resource USING btree (int_max_cores); - - --- --- TOC entry 3877 (class 1259 OID 17767) --- Name: i_job_resource_max_gpus; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_resource_max_gpus ON public.job_resource USING btree (int_max_gpus); - - --- --- TOC entry 3878 (class 1259 OID 16962) --- Name: i_job_resource_min_max; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_resource_min_max ON public.job_resource USING btree (int_min_cores, int_max_cores); - - --- --- TOC entry 3882 (class 1259 OID 16959) --- Name: i_job_stat_int_waiting_count; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_stat_int_waiting_count ON public.job_stat USING btree (int_waiting_count); - - --- --- TOC entry 3883 (class 1259 OID 16960) --- Name: i_job_stat_pk_job; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE UNIQUE INDEX i_job_stat_pk_job ON public.job_stat USING btree (pk_job); - - --- --- TOC entry 3954 (class 1259 OID 16918) --- Name: i_job_str_name; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_str_name ON public.job USING btree (str_name); - - --- --- TOC entry 3955 (class 1259 OID 16910) --- Name: i_job_str_os; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_str_os ON public.job USING btree (str_os); - - --- --- TOC entry 3956 (class 1259 OID 16913) --- Name: i_job_str_shot; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_str_shot ON public.job USING btree (str_shot); - - --- --- TOC entry 3957 (class 1259 OID 16919) --- Name: i_job_str_state; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_str_state ON public.job USING btree (str_state); - - --- --- TOC entry 3879 (class 1259 OID 16963) --- Name: i_job_tier; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_tier ON public.job_resource USING btree (float_tier); - - --- --- TOC entry 3958 (class 1259 OID 17962) --- Name: i_job_ts_updated; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_job_ts_updated ON public.job USING btree (ts_updated); - - --- --- TOC entry 3927 (class 1259 OID 16922) --- Name: i_layer_b_threadable; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_b_threadable ON public.layer USING btree (b_threadable); - - --- --- TOC entry 3928 (class 1259 OID 17818) --- Name: i_layer_cores_gpus_mem; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_cores_gpus_mem ON public.layer USING btree (int_cores_min, int_gpus_min, int_mem_min, int_gpu_mem_min); - - --- --- TOC entry 3929 (class 1259 OID 17819) --- Name: i_layer_cores_gpus_mem_thread; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_cores_gpus_mem_thread ON public.layer USING btree (int_cores_min, int_gpus_min, int_mem_min, int_gpu_mem_min, b_threadable); - - --- --- TOC entry 3930 (class 1259 OID 16923) --- Name: i_layer_cores_mem; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_cores_mem ON public.layer USING btree (int_cores_min, int_mem_min); - - --- --- TOC entry 3931 (class 1259 OID 16924) --- Name: i_layer_cores_mem_thread; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_cores_mem_thread ON public.layer USING btree (int_cores_min, int_mem_min, b_threadable); - - --- --- TOC entry 3921 (class 1259 OID 16933) --- Name: i_layer_env_pk_job; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_env_pk_job ON public.layer_env USING btree (pk_job); - - --- --- TOC entry 3922 (class 1259 OID 16934) --- Name: i_layer_env_pk_layer; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_env_pk_layer ON public.layer_env USING btree (pk_layer); - - --- --- TOC entry 3844 (class 1259 OID 16987) --- Name: i_layer_history_b_archived; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_history_b_archived ON public.layer_history USING btree (b_archived); - - --- --- TOC entry 3845 (class 1259 OID 16986) --- Name: i_layer_history_pk_job; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_history_pk_job ON public.layer_history USING btree (pk_job); - - --- --- TOC entry 3846 (class 1259 OID 16984) --- Name: i_layer_history_str_name; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_history_str_name ON public.layer_history USING btree (str_name); - - --- --- TOC entry 3847 (class 1259 OID 16985) --- Name: i_layer_history_str_type; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_history_str_type ON public.layer_history USING btree (str_type); - - --- --- TOC entry 3932 (class 1259 OID 17958) --- Name: i_layer_int_cores_max; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_int_cores_max ON public.layer USING btree (int_cores_max); - - --- --- TOC entry 3933 (class 1259 OID 16926) --- Name: i_layer_int_dispatch_order; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_int_dispatch_order ON public.layer USING btree (int_dispatch_order); - - --- --- TOC entry 3934 (class 1259 OID 17800) --- Name: i_layer_int_gpu_mem_min; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_int_gpu_mem_min ON public.layer USING btree (int_gpu_mem_min); - - --- --- TOC entry 3935 (class 1259 OID 17957) --- Name: i_layer_int_gpus_mem_min_1; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_int_gpus_mem_min_1 ON public.layer USING btree (int_gpu_mem_min); - - --- --- TOC entry 3936 (class 1259 OID 17956) --- Name: i_layer_int_gpus_min; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_int_gpus_min ON public.layer USING btree (int_gpus_min); - - --- --- TOC entry 4039 (class 1259 OID 17942) --- Name: i_layer_limit_pk_layer; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_limit_pk_layer ON public.layer_limit USING btree (pk_layer); - - --- --- TOC entry 4040 (class 1259 OID 17943) --- Name: i_layer_limit_pk_limit_record; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_limit_pk_limit_record ON public.layer_limit USING btree (pk_limit_record); - - --- --- TOC entry 3817 (class 1259 OID 17792) --- Name: i_layer_mem_int_max_rss; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_mem_int_max_rss ON public.layer_mem USING btree (int_max_rss); - - --- --- TOC entry 3937 (class 1259 OID 16925) --- Name: i_layer_mem_min; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_mem_min ON public.layer USING btree (int_mem_min); - - --- --- TOC entry 3818 (class 1259 OID 17004) --- Name: i_layer_mem_pk_job; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_mem_pk_job ON public.layer_mem USING btree (pk_job); - - --- --- TOC entry 3819 (class 1259 OID 17003) --- Name: i_layer_mem_pk_layer; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE UNIQUE INDEX i_layer_mem_pk_layer ON public.layer_mem USING btree (pk_layer); - - --- --- TOC entry 3769 (class 1259 OID 17037) --- Name: i_layer_output_pk_job; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_output_pk_job ON public.layer_output USING btree (pk_job); - - --- --- TOC entry 3770 (class 1259 OID 17036) --- Name: i_layer_output_pk_layer; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_output_pk_layer ON public.layer_output USING btree (pk_layer); - - --- --- TOC entry 3771 (class 1259 OID 17038) --- Name: i_layer_output_unique; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE UNIQUE INDEX i_layer_output_unique ON public.layer_output USING btree (pk_layer, str_filespec); - - --- --- TOC entry 3938 (class 1259 OID 16928) --- Name: i_layer_pkjob; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_pkjob ON public.layer USING btree (pk_job); - - --- --- TOC entry 3918 (class 1259 OID 16937) --- Name: i_layer_resource_pk_job; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_resource_pk_job ON public.layer_resource USING btree (pk_job); - - --- --- TOC entry 3911 (class 1259 OID 16938) --- Name: i_layer_stat_pk_layer; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE UNIQUE INDEX i_layer_stat_pk_layer ON public.layer_stat USING btree (pk_layer); - - --- --- TOC entry 3939 (class 1259 OID 17963) --- Name: i_layer_str_tags; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_str_tags ON public.layer USING btree (str_tags); - - --- --- TOC entry 3940 (class 1259 OID 16929) --- Name: i_layer_strname; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_strname ON public.layer USING btree (str_name); - - --- --- TOC entry 3908 (class 1259 OID 16942) --- Name: i_layer_usage_pk_job; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layer_usage_pk_job ON public.layer_usage USING btree (pk_job); - - --- --- TOC entry 3912 (class 1259 OID 16941) --- Name: i_layerstat_int_waiting_count; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layerstat_int_waiting_count ON public.layer_stat USING btree (( -CASE - WHEN (int_waiting_count > 0) THEN 1 - ELSE NULL::integer -END), ( -CASE - WHEN (int_waiting_count > 0) THEN pk_layer - ELSE NULL::character varying -END)); - - --- --- TOC entry 3913 (class 1259 OID 16940) --- Name: i_layerstat_pkjob; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_layerstat_pkjob ON public.layer_stat USING btree (pk_job); - - --- --- TOC entry 4038 (class 1259 OID 17944) --- Name: i_limit_record_pk_limit_record; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_limit_record_pk_limit_record ON public.limit_record USING btree (pk_limit_record); - - --- --- TOC entry 3903 (class 1259 OID 16946) --- Name: i_matcher_pk_filter; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_matcher_pk_filter ON public.matcher USING btree (pk_filter); - - --- --- TOC entry 3783 (class 1259 OID 17027) --- Name: i_owner_pk_show; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_owner_pk_show ON public.owner USING btree (pk_show); - - --- --- TOC entry 3784 (class 1259 OID 17028) --- Name: i_owner_str_username; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE UNIQUE INDEX i_owner_str_username ON public.owner USING btree (str_username); - - --- --- TOC entry 3812 (class 1259 OID 17007) --- Name: i_point_pk_dept; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_point_pk_dept ON public.point USING btree (pk_dept); - - --- --- TOC entry 3813 (class 1259 OID 17008) --- Name: i_point_pk_show; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_point_pk_show ON public.point USING btree (pk_show); - - --- --- TOC entry 3814 (class 1259 OID 17010) --- Name: i_point_tier; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_point_tier ON public.point USING btree (float_tier); - - --- --- TOC entry 3896 (class 1259 OID 17772) --- Name: i_proc_int_gpu_mem_reserved; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_proc_int_gpu_mem_reserved ON public.proc USING btree (int_gpu_mem_reserved); - - --- --- TOC entry 3897 (class 1259 OID 16949) --- Name: i_proc_pkhost; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_proc_pkhost ON public.proc USING btree (pk_host); - - --- --- TOC entry 3898 (class 1259 OID 16950) --- Name: i_proc_pkjob; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_proc_pkjob ON public.proc USING btree (pk_job); - - --- --- TOC entry 3899 (class 1259 OID 16951) --- Name: i_proc_pklayer; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_proc_pklayer ON public.proc USING btree (pk_layer); - - --- --- TOC entry 3900 (class 1259 OID 16952) --- Name: i_proc_pkshow; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_proc_pkshow ON public.proc USING btree (pk_show); - - --- --- TOC entry 4036 (class 1259 OID 17051) --- Name: i_redirect_create; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_redirect_create ON public.redirect USING btree (lng_creation_time); - - --- --- TOC entry 4037 (class 1259 OID 17050) --- Name: i_redirect_group; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_redirect_group ON public.redirect USING btree (str_group_id); - - --- --- TOC entry 3796 (class 1259 OID 17728) --- Name: i_service_int_gpu_mem_min; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_service_int_gpu_mem_min ON public.service USING btree (int_gpu_mem_min); - - --- --- TOC entry 3797 (class 1259 OID 17737) --- Name: i_service_int_gpus_min; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_service_int_gpus_min ON public.service USING btree (int_gpus_min); - - --- --- TOC entry 3798 (class 1259 OID 17018) --- Name: i_service_str_name; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE UNIQUE INDEX i_service_str_name ON public.service USING btree (str_name); - - --- --- TOC entry 3833 (class 1259 OID 16993) --- Name: i_show_alias_pk_show; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_show_alias_pk_show ON public.show_alias USING btree (pk_show); - - --- --- TOC entry 3774 (class 1259 OID 17671) --- Name: i_show_service_int_gpu_mem_min; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_show_service_int_gpu_mem_min ON public.show_service USING btree (int_gpu_mem_min); - - --- --- TOC entry 3775 (class 1259 OID 17680) --- Name: i_show_service_int_gpus_min; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_show_service_int_gpus_min ON public.show_service USING btree (int_gpus_min); - - --- --- TOC entry 3776 (class 1259 OID 17033) --- Name: i_show_service_str_name; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE UNIQUE INDEX i_show_service_str_name ON public.show_service USING btree (str_name, pk_show); - - --- --- TOC entry 3888 (class 1259 OID 16955) --- Name: i_sub_tier; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_sub_tier ON public.subscription USING btree (float_tier); - - --- --- TOC entry 3889 (class 1259 OID 16958) --- Name: i_subscription_pkalloc; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_subscription_pkalloc ON public.subscription USING btree (pk_alloc); - - --- --- TOC entry 3807 (class 1259 OID 17012) --- Name: i_task_pk_point; Type: INDEX; Schema: public; Owner: cuebot --- - -CREATE INDEX i_task_pk_point ON public.task USING btree (pk_point); - - --- --- TOC entry 4138 (class 2620 OID 17513) --- Name: folder after_insert_folder; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER after_insert_folder AFTER INSERT ON public.folder FOR EACH ROW EXECUTE FUNCTION public.trigger__after_insert_folder(); - - --- --- TOC entry 4125 (class 2620 OID 17485) --- Name: job after_insert_job; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER after_insert_job AFTER INSERT ON public.job FOR EACH ROW EXECUTE FUNCTION public.trigger__after_insert_job(); - - --- --- TOC entry 4122 (class 2620 OID 17493) --- Name: layer after_insert_layer; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER after_insert_layer AFTER INSERT ON public.layer FOR EACH ROW EXECUTE FUNCTION public.trigger__after_insert_layer(); - - --- --- TOC entry 4126 (class 2620 OID 17487) --- Name: job after_job_dept_update; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER after_job_dept_update AFTER UPDATE ON public.job FOR EACH ROW WHEN ((((new.pk_dept)::text <> (old.pk_dept)::text) AND ((new.str_state)::text = 'PENDING'::text))) EXECUTE FUNCTION public.trigger__after_job_dept_update(); - - --- --- TOC entry 4127 (class 2620 OID 17483) --- Name: job after_job_finished; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER after_job_finished AFTER UPDATE ON public.job FOR EACH ROW WHEN ((((old.str_state)::text = 'PENDING'::text) AND ((new.str_state)::text = 'FINISHED'::text))) EXECUTE FUNCTION public.trigger__after_job_finished(); - - --- --- TOC entry 4128 (class 2620 OID 17479) --- Name: job after_job_moved; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER after_job_moved AFTER UPDATE ON public.job FOR EACH ROW WHEN (((new.pk_folder)::text <> (old.pk_folder)::text)) EXECUTE FUNCTION public.trigger__after_job_moved(); - - --- --- TOC entry 4139 (class 2620 OID 17511) --- Name: folder before_delete_folder; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER before_delete_folder BEFORE DELETE ON public.folder FOR EACH ROW EXECUTE FUNCTION public.trigger__before_delete_folder(); - - --- --- TOC entry 4130 (class 2620 OID 17501) --- Name: host before_delete_host; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER before_delete_host BEFORE DELETE ON public.host FOR EACH ROW EXECUTE FUNCTION public.trigger__before_delete_host(); - - --- --- TOC entry 4129 (class 2620 OID 17481) --- Name: job before_delete_job; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER before_delete_job BEFORE DELETE ON public.job FOR EACH ROW EXECUTE FUNCTION public.trigger__before_delete_job(); - - --- --- TOC entry 4123 (class 2620 OID 17495) --- Name: layer before_delete_layer; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER before_delete_layer BEFORE DELETE ON public.layer FOR EACH ROW EXECUTE FUNCTION public.trigger__before_delete_layer(); - - --- --- TOC entry 4124 (class 2620 OID 17550) --- Name: layer before_delete_layer_drop_limit; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER before_delete_layer_drop_limit BEFORE DELETE ON public.layer FOR EACH ROW EXECUTE FUNCTION public.trigger__before_delete_layer_drop_limit(); - - --- --- TOC entry 4140 (class 2620 OID 17515) --- Name: folder before_insert_folder; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER before_insert_folder BEFORE INSERT ON public.folder FOR EACH ROW EXECUTE FUNCTION public.trigger__before_insert_folder(); - - --- --- TOC entry 4119 (class 2620 OID 17517) --- Name: proc before_insert_proc; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER before_insert_proc BEFORE INSERT ON public.proc FOR EACH ROW EXECUTE FUNCTION public.trigger__before_insert_proc(); - - --- --- TOC entry 4132 (class 2620 OID 17529) --- Name: frame frame_history_open; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER frame_history_open AFTER UPDATE ON public.frame FOR EACH ROW WHEN (((new.str_state)::text <> (old.str_state)::text)) EXECUTE FUNCTION public.trigger__frame_history_open(); - - --- --- TOC entry 4111 (class 2620 OID 17539) --- Name: point point_tier; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER point_tier BEFORE UPDATE ON public.point FOR EACH ROW EXECUTE FUNCTION public.trigger__point_tier(); - - --- --- TOC entry 4107 (class 2620 OID 17541) --- Name: frame_history tbiu_frame_history; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER tbiu_frame_history BEFORE INSERT OR UPDATE ON public.frame_history FOR EACH ROW EXECUTE FUNCTION public.trigger__tbiu_frame_history(); - - --- --- TOC entry 4114 (class 2620 OID 17497) --- Name: job_history tbiu_job_history; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER tbiu_job_history BEFORE INSERT OR UPDATE ON public.job_history FOR EACH ROW EXECUTE FUNCTION public.trigger__tbiu_job_history(); - - --- --- TOC entry 4113 (class 2620 OID 17477) --- Name: layer_history tbiu_layer_history; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER tbiu_layer_history BEFORE INSERT OR UPDATE ON public.layer_history FOR EACH ROW EXECUTE FUNCTION public.trigger__tbiu_layer_history(); - - --- --- TOC entry 4112 (class 2620 OID 17509) --- Name: folder_resource tier_folder; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER tier_folder BEFORE UPDATE ON public.folder_resource FOR EACH ROW EXECUTE FUNCTION public.trigger__tier_folder(); - - --- --- TOC entry 4108 (class 2620 OID 17491) --- Name: host_local tier_host_local; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER tier_host_local BEFORE UPDATE ON public.host_local FOR EACH ROW EXECUTE FUNCTION public.trigger__tier_host_local(); - - --- --- TOC entry 4115 (class 2620 OID 17505) --- Name: job_resource tier_job; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER tier_job BEFORE UPDATE ON public.job_resource FOR EACH ROW EXECUTE FUNCTION public.trigger__tier_job(); - - --- --- TOC entry 4117 (class 2620 OID 17537) --- Name: subscription tier_subscription; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER tier_subscription BEFORE UPDATE ON public.subscription FOR EACH ROW EXECUTE FUNCTION public.trigger__tier_subscription(); - - --- --- TOC entry 4133 (class 2620 OID 17531) --- Name: frame update_frame_checkpoint_state; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER update_frame_checkpoint_state BEFORE UPDATE ON public.frame FOR EACH ROW WHEN ((((new.str_state)::text = 'WAITING'::text) AND ((old.str_state)::text = 'RUNNING'::text) AND ((new.str_checkpoint_state)::text = ANY ((ARRAY['ENABLED'::character varying, 'COPYING'::character varying])::text[])))) EXECUTE FUNCTION public.trigger__update_frame_checkpoint_state(); - - --- --- TOC entry 4134 (class 2620 OID 17527) --- Name: frame update_frame_dep_to_wait; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER update_frame_dep_to_wait BEFORE UPDATE ON public.frame FOR EACH ROW WHEN (((old.int_depend_count > 0) AND (new.int_depend_count < 1) AND ((old.str_state)::text = 'DEPEND'::text))) EXECUTE FUNCTION public.trigger__update_frame_dep_to_wait(); - - --- --- TOC entry 4135 (class 2620 OID 17525) --- Name: frame update_frame_eaten; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER update_frame_eaten BEFORE UPDATE ON public.frame FOR EACH ROW WHEN ((((new.str_state)::text = 'EATEN'::text) AND ((old.str_state)::text = 'SUCCEEDED'::text))) EXECUTE FUNCTION public.trigger__update_frame_eaten(); - - --- --- TOC entry 4136 (class 2620 OID 17533) --- Name: frame update_frame_status_counts; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER update_frame_status_counts AFTER UPDATE ON public.frame FOR EACH ROW WHEN ((((old.str_state)::text <> 'SETUP'::text) AND ((old.str_state)::text <> (new.str_state)::text))) EXECUTE FUNCTION public.trigger__update_frame_status_counts(); - - --- --- TOC entry 4137 (class 2620 OID 17523) --- Name: frame update_frame_wait_to_dep; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER update_frame_wait_to_dep BEFORE UPDATE ON public.frame FOR EACH ROW WHEN (((new.int_depend_count > 0) AND ((new.str_state)::text = ANY ((ARRAY['DEAD'::character varying, 'SUCCEEDED'::character varying, 'WAITING'::character varying, 'CHECKPOINT'::character varying])::text[])))) EXECUTE FUNCTION public.trigger__update_frame_wait_to_dep(); - - --- --- TOC entry 4120 (class 2620 OID 17519) --- Name: proc update_proc_update_layer; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER update_proc_update_layer AFTER UPDATE ON public.proc FOR EACH ROW WHEN (((new.pk_layer)::text <> (old.pk_layer)::text)) EXECUTE FUNCTION public.trigger__update_proc_update_layer(); - - --- --- TOC entry 4121 (class 2620 OID 17521) --- Name: proc upgrade_proc_memory_usage; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER upgrade_proc_memory_usage AFTER UPDATE ON public.proc FOR EACH ROW WHEN ((new.int_mem_reserved <> old.int_mem_reserved)) EXECUTE FUNCTION public.trigger__upgrade_proc_memory_usage(); - - --- --- TOC entry 4109 (class 2620 OID 17939) --- Name: host_local verify_host_local; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER verify_host_local BEFORE UPDATE ON public.host_local FOR EACH ROW WHEN (((new.int_cores_max = old.int_cores_max) AND (new.int_mem_max = old.int_mem_max) AND ((new.int_cores_idle <> old.int_cores_idle) OR (new.int_mem_idle <> old.int_mem_idle)) AND ((new.int_gpus_max = old.int_gpus_max) AND (new.int_gpu_mem_max = old.int_gpu_mem_max)) AND ((new.int_gpus_idle <> old.int_gpus_idle) OR (new.int_gpu_mem_idle <> old.int_gpu_mem_idle)))) EXECUTE FUNCTION public.trigger__verify_host_local(); - - --- --- TOC entry 4131 (class 2620 OID 17940) --- Name: host verify_host_resources; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER verify_host_resources BEFORE UPDATE ON public.host FOR EACH ROW WHEN (((new.int_cores_idle <> old.int_cores_idle) OR (new.int_mem_idle <> old.int_mem_idle) OR (new.int_gpus_idle <> old.int_gpus_idle) OR (new.int_gpu_mem_idle <> old.int_gpu_mem_idle))) EXECUTE FUNCTION public.trigger__verify_host_resources(); - - --- --- TOC entry 4110 (class 2620 OID 17507) --- Name: job_local verify_job_local; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER verify_job_local BEFORE UPDATE ON public.job_local FOR EACH ROW WHEN (((new.int_max_cores = old.int_max_cores) AND (new.int_cores > old.int_cores))) EXECUTE FUNCTION public.trigger__verify_job_local(); - - --- --- TOC entry 4116 (class 2620 OID 17941) --- Name: job_resource verify_job_resources; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER verify_job_resources BEFORE UPDATE ON public.job_resource FOR EACH ROW WHEN ((((new.int_max_cores = old.int_max_cores) AND (new.int_cores > old.int_cores)) OR ((new.int_max_gpus = old.int_max_gpus) AND (new.int_gpus > old.int_gpus)))) EXECUTE FUNCTION public.trigger__verify_job_resources(); - - --- --- TOC entry 4118 (class 2620 OID 17535) --- Name: subscription verify_subscription; Type: TRIGGER; Schema: public; Owner: cuebot --- - -CREATE TRIGGER verify_subscription BEFORE UPDATE ON public.subscription FOR EACH ROW WHEN (((new.int_burst = old.int_burst) AND (new.int_cores > old.int_cores))) EXECUTE FUNCTION public.trigger__verify_subscription(); - - --- --- TOC entry 4104 (class 2606 OID 17165) --- Name: action c_action_pk_filter; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.action - ADD CONSTRAINT c_action_pk_filter FOREIGN KEY (pk_filter) REFERENCES public.filter(pk_filter); - - --- --- TOC entry 4105 (class 2606 OID 17170) --- Name: action c_action_pk_folder; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.action - ADD CONSTRAINT c_action_pk_folder FOREIGN KEY (pk_folder) REFERENCES public.folder(pk_folder); - - --- --- TOC entry 4103 (class 2606 OID 17175) --- Name: alloc c_alloc_pk_facility; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.alloc - ADD CONSTRAINT c_alloc_pk_facility FOREIGN KEY (pk_facility) REFERENCES public.facility(pk_facility); - - --- --- TOC entry 4101 (class 2606 OID 17180) --- Name: comments c_comment_pk_host; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.comments - ADD CONSTRAINT c_comment_pk_host FOREIGN KEY (pk_host) REFERENCES public.host(pk_host); - - --- --- TOC entry 4102 (class 2606 OID 17185) --- Name: comments c_comment_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.comments - ADD CONSTRAINT c_comment_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); - - --- --- TOC entry 4051 (class 2606 OID 17435) --- Name: deed c_deed_pk_host; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.deed - ADD CONSTRAINT c_deed_pk_host FOREIGN KEY (pk_host) REFERENCES public.host(pk_host); - - --- --- TOC entry 4100 (class 2606 OID 17190) --- Name: filter c_filter_pk_show; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.filter - ADD CONSTRAINT c_filter_pk_show FOREIGN KEY (pk_show) REFERENCES public.show(pk_show); - - --- --- TOC entry 4097 (class 2606 OID 17205) --- Name: folder_level c_folder_level_pk_folder; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.folder_level - ADD CONSTRAINT c_folder_level_pk_folder FOREIGN KEY (pk_folder) REFERENCES public.folder(pk_folder); - - --- --- TOC entry 4098 (class 2606 OID 17200) --- Name: folder c_folder_pk_dept; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.folder - ADD CONSTRAINT c_folder_pk_dept FOREIGN KEY (pk_dept) REFERENCES public.dept(pk_dept); - - --- --- TOC entry 4099 (class 2606 OID 17195) --- Name: folder c_folder_pk_show; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.folder - ADD CONSTRAINT c_folder_pk_show FOREIGN KEY (pk_show) REFERENCES public.show(pk_show); - - --- --- TOC entry 4063 (class 2606 OID 17375) --- Name: folder_resource c_folder_resource_pk_folder; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.folder_resource - ADD CONSTRAINT c_folder_resource_pk_folder FOREIGN KEY (pk_folder) REFERENCES public.folder(pk_folder); - - --- --- TOC entry 4045 (class 2606 OID 17465) --- Name: frame_history c_frame_history_pk_alloc; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.frame_history - ADD CONSTRAINT c_frame_history_pk_alloc FOREIGN KEY (pk_alloc) REFERENCES public.alloc(pk_alloc); - - --- --- TOC entry 4046 (class 2606 OID 17455) --- Name: frame_history c_frame_history_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.frame_history - ADD CONSTRAINT c_frame_history_pk_job FOREIGN KEY (pk_job) REFERENCES public.job_history(pk_job) ON DELETE CASCADE; - - --- --- TOC entry 4047 (class 2606 OID 17460) --- Name: frame_history c_frame_history_pk_layer; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.frame_history - ADD CONSTRAINT c_frame_history_pk_layer FOREIGN KEY (pk_layer) REFERENCES public.layer_history(pk_layer) ON DELETE CASCADE; - - --- --- TOC entry 4095 (class 2606 OID 17210) --- Name: frame c_frame_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.frame - ADD CONSTRAINT c_frame_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); - - --- --- TOC entry 4096 (class 2606 OID 17215) --- Name: frame c_frame_pk_layer; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.frame - ADD CONSTRAINT c_frame_pk_layer FOREIGN KEY (pk_layer) REFERENCES public.layer(pk_layer); - - --- --- TOC entry 4106 (class 2606 OID 17969) --- Name: frame_state_display_overrides c_frame_state_overrides_pk_frame; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.frame_state_display_overrides - ADD CONSTRAINT c_frame_state_overrides_pk_frame FOREIGN KEY (pk_frame) REFERENCES public.frame(pk_frame); - - --- --- TOC entry 4053 (class 2606 OID 17425) --- Name: host_local c_host_local_pk_host; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.host_local - ADD CONSTRAINT c_host_local_pk_host FOREIGN KEY (pk_host) REFERENCES public.host(pk_host); - - --- --- TOC entry 4054 (class 2606 OID 17420) --- Name: host_local c_host_local_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.host_local - ADD CONSTRAINT c_host_local_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); - - --- --- TOC entry 4094 (class 2606 OID 17220) --- Name: host c_host_pk_alloc; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.host - ADD CONSTRAINT c_host_pk_alloc FOREIGN KEY (pk_alloc) REFERENCES public.alloc(pk_alloc); - - --- --- TOC entry 4093 (class 2606 OID 17225) --- Name: host_stat c_host_stat_pk_host; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.host_stat - ADD CONSTRAINT c_host_stat_pk_host FOREIGN KEY (pk_host) REFERENCES public.host(pk_host); - - --- --- TOC entry 4088 (class 2606 OID 17250) --- Name: job_env c_job_env_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job_env - ADD CONSTRAINT c_job_env_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); - - --- --- TOC entry 4068 (class 2606 OID 17345) --- Name: job_history c_job_history_pk_dept; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job_history - ADD CONSTRAINT c_job_history_pk_dept FOREIGN KEY (pk_dept) REFERENCES public.dept(pk_dept); - - --- --- TOC entry 4069 (class 2606 OID 17340) --- Name: job_history c_job_history_pk_facility; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job_history - ADD CONSTRAINT c_job_history_pk_facility FOREIGN KEY (pk_facility) REFERENCES public.facility(pk_facility); - - --- --- TOC entry 4070 (class 2606 OID 17350) --- Name: job_history c_job_history_pk_show; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job_history - ADD CONSTRAINT c_job_history_pk_show FOREIGN KEY (pk_show) REFERENCES public.show(pk_show); - - --- --- TOC entry 4055 (class 2606 OID 17415) --- Name: job_local c_job_local_pk_host; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job_local - ADD CONSTRAINT c_job_local_pk_host FOREIGN KEY (pk_host) REFERENCES public.host(pk_host); - - --- --- TOC entry 4056 (class 2606 OID 17410) --- Name: job_local c_job_local_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job_local - ADD CONSTRAINT c_job_local_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); - - --- --- TOC entry 4062 (class 2606 OID 17380) --- Name: job_mem c_job_mem_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job_mem - ADD CONSTRAINT c_job_mem_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); - - --- --- TOC entry 4089 (class 2606 OID 17245) --- Name: job c_job_pk_dept; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job - ADD CONSTRAINT c_job_pk_dept FOREIGN KEY (pk_dept) REFERENCES public.dept(pk_dept); - - --- --- TOC entry 4090 (class 2606 OID 17240) --- Name: job c_job_pk_facility; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job - ADD CONSTRAINT c_job_pk_facility FOREIGN KEY (pk_facility) REFERENCES public.facility(pk_facility); - - --- --- TOC entry 4091 (class 2606 OID 17235) --- Name: job c_job_pk_folder; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job - ADD CONSTRAINT c_job_pk_folder FOREIGN KEY (pk_folder) REFERENCES public.folder(pk_folder); - - --- --- TOC entry 4092 (class 2606 OID 17230) --- Name: job c_job_pk_show; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job - ADD CONSTRAINT c_job_pk_show FOREIGN KEY (pk_show) REFERENCES public.show(pk_show); - - --- --- TOC entry 4065 (class 2606 OID 17360) --- Name: job_post c_job_post_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job_post - ADD CONSTRAINT c_job_post_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); - - --- --- TOC entry 4066 (class 2606 OID 17365) --- Name: job_post c_job_post_pk_post_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job_post - ADD CONSTRAINT c_job_post_pk_post_job FOREIGN KEY (pk_post_job) REFERENCES public.job(pk_job); - - --- --- TOC entry 4072 (class 2606 OID 17330) --- Name: job_resource c_job_resource_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job_resource - ADD CONSTRAINT c_job_resource_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); - - --- --- TOC entry 4073 (class 2606 OID 17325) --- Name: job_stat c_job_stat_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job_stat - ADD CONSTRAINT c_job_stat_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); - - --- --- TOC entry 4071 (class 2606 OID 17335) --- Name: job_usage c_job_usage_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.job_usage - ADD CONSTRAINT c_job_usage_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); - - --- --- TOC entry 4085 (class 2606 OID 17265) --- Name: layer_env c_layer_env_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer_env - ADD CONSTRAINT c_layer_env_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); - - --- --- TOC entry 4086 (class 2606 OID 17260) --- Name: layer_env c_layer_env_pk_layer; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer_env - ADD CONSTRAINT c_layer_env_pk_layer FOREIGN KEY (pk_layer) REFERENCES public.layer(pk_layer); - - --- --- TOC entry 4067 (class 2606 OID 17355) --- Name: layer_history c_layer_history_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer_history - ADD CONSTRAINT c_layer_history_pk_job FOREIGN KEY (pk_job) REFERENCES public.job_history(pk_job) ON DELETE CASCADE; - - --- --- TOC entry 4060 (class 2606 OID 17385) --- Name: layer_mem c_layer_mem_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer_mem - ADD CONSTRAINT c_layer_mem_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); - - --- --- TOC entry 4061 (class 2606 OID 17390) --- Name: layer_mem c_layer_mem_pk_layer; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer_mem - ADD CONSTRAINT c_layer_mem_pk_layer FOREIGN KEY (pk_layer) REFERENCES public.layer(pk_layer); - - --- --- TOC entry 4048 (class 2606 OID 17450) --- Name: layer_output c_layer_output_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer_output - ADD CONSTRAINT c_layer_output_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); - - --- --- TOC entry 4049 (class 2606 OID 17445) --- Name: layer_output c_layer_output_pk_layer; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer_output - ADD CONSTRAINT c_layer_output_pk_layer FOREIGN KEY (pk_layer) REFERENCES public.layer(pk_layer); - - --- --- TOC entry 4087 (class 2606 OID 17255) --- Name: layer c_layer_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer - ADD CONSTRAINT c_layer_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); - - --- --- TOC entry 4083 (class 2606 OID 17270) --- Name: layer_resource c_layer_resource_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer_resource - ADD CONSTRAINT c_layer_resource_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); - - --- --- TOC entry 4084 (class 2606 OID 17275) --- Name: layer_resource c_layer_resource_pk_layer; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer_resource - ADD CONSTRAINT c_layer_resource_pk_layer FOREIGN KEY (pk_layer) REFERENCES public.layer(pk_layer); - - --- --- TOC entry 4081 (class 2606 OID 17280) --- Name: layer_stat c_layer_stat_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer_stat - ADD CONSTRAINT c_layer_stat_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); - - --- --- TOC entry 4082 (class 2606 OID 17285) --- Name: layer_stat c_layer_stat_pk_layer; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer_stat - ADD CONSTRAINT c_layer_stat_pk_layer FOREIGN KEY (pk_layer) REFERENCES public.layer(pk_layer); - - --- --- TOC entry 4079 (class 2606 OID 17290) --- Name: layer_usage c_layer_usage_pk_job; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer_usage - ADD CONSTRAINT c_layer_usage_pk_job FOREIGN KEY (pk_job) REFERENCES public.job(pk_job); - - --- --- TOC entry 4080 (class 2606 OID 17295) --- Name: layer_usage c_layer_usage_pk_layer; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.layer_usage - ADD CONSTRAINT c_layer_usage_pk_layer FOREIGN KEY (pk_layer) REFERENCES public.layer(pk_layer); - - --- --- TOC entry 4078 (class 2606 OID 17300) --- Name: matcher c_matcher_pk_filter; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.matcher - ADD CONSTRAINT c_matcher_pk_filter FOREIGN KEY (pk_filter) REFERENCES public.filter(pk_filter); - - --- --- TOC entry 4052 (class 2606 OID 17430) --- Name: owner c_owner_pk_show; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.owner - ADD CONSTRAINT c_owner_pk_show FOREIGN KEY (pk_show) REFERENCES public.show(pk_show); - - --- --- TOC entry 4058 (class 2606 OID 17395) --- Name: point c_point_pk_dept; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.point - ADD CONSTRAINT c_point_pk_dept FOREIGN KEY (pk_dept) REFERENCES public.dept(pk_dept); - - --- --- TOC entry 4059 (class 2606 OID 17400) --- Name: point c_point_pk_show; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.point - ADD CONSTRAINT c_point_pk_show FOREIGN KEY (pk_show) REFERENCES public.show(pk_show); - - --- --- TOC entry 4076 (class 2606 OID 17305) --- Name: proc c_proc_pk_frame; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.proc - ADD CONSTRAINT c_proc_pk_frame FOREIGN KEY (pk_frame) REFERENCES public.frame(pk_frame); - - --- --- TOC entry 4077 (class 2606 OID 17310) --- Name: proc c_proc_pk_host; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.proc - ADD CONSTRAINT c_proc_pk_host FOREIGN KEY (pk_host) REFERENCES public.host(pk_host); - - --- --- TOC entry 4064 (class 2606 OID 17370) --- Name: show_alias c_show_alias_pk_show; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.show_alias - ADD CONSTRAINT c_show_alias_pk_show FOREIGN KEY (pk_show) REFERENCES public.show(pk_show); - - --- --- TOC entry 4050 (class 2606 OID 17440) --- Name: show_service c_show_service_pk_show; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.show_service - ADD CONSTRAINT c_show_service_pk_show FOREIGN KEY (pk_show) REFERENCES public.show(pk_show); - - --- --- TOC entry 4074 (class 2606 OID 17315) --- Name: subscription c_subscription_pk_alloc; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.subscription - ADD CONSTRAINT c_subscription_pk_alloc FOREIGN KEY (pk_alloc) REFERENCES public.alloc(pk_alloc); - - --- --- TOC entry 4075 (class 2606 OID 17320) --- Name: subscription c_subscription_pk_show; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.subscription - ADD CONSTRAINT c_subscription_pk_show FOREIGN KEY (pk_show) REFERENCES public.show(pk_show); - - --- --- TOC entry 4057 (class 2606 OID 17405) --- Name: task c_task_pk_point; Type: FK CONSTRAINT; Schema: public; Owner: cuebot --- - -ALTER TABLE ONLY public.task - ADD CONSTRAINT c_task_pk_point FOREIGN KEY (pk_point) REFERENCES public.point(pk_point); - - --- Completed on 2025-09-04 18:51:09 UTC - --- --- PostgreSQL database dump complete --- diff --git a/rust/crates/scheduler/resources/seed_data.sql b/rust/crates/scheduler/resources/seed_data.sql new file mode 120000 index 000000000..8983e6003 --- /dev/null +++ b/rust/crates/scheduler/resources/seed_data.sql @@ -0,0 +1 @@ +../../../../cuebot/src/main/resources/conf/ddl/postgres/seed_data.sql \ No newline at end of file diff --git a/rust/crates/scheduler/src/cluster.rs b/rust/crates/scheduler/src/cluster.rs index f30591491..bf30a4159 100644 --- a/rust/crates/scheduler/src/cluster.rs +++ b/rust/crates/scheduler/src/cluster.rs @@ -291,6 +291,27 @@ impl ClusterFeed { shows_filter: Option>, ) -> Result> { let cluster_dao = ClusterDao::new().await?; + Self::load_clusters_inner(cluster_dao, facility_id, ignore_tags, shows_filter).await + } + + /// Loads clusters using an externally provided connection pool. + /// Useful for testing with an embedded database. + pub async fn load_clusters_with_pool( + pool: Arc>, + facility_id: Option, + ignore_tags: &[String], + shows_filter: Option>, + ) -> Result> { + let cluster_dao = ClusterDao::with_pool(pool); + Self::load_clusters_inner(cluster_dao, facility_id, ignore_tags, shows_filter).await + } + + async fn load_clusters_inner( + cluster_dao: ClusterDao, + facility_id: Option, + ignore_tags: &[String], + shows_filter: Option>, + ) -> Result> { // Fetch clusters for alloc and non_alloc tags let mut clusters_stream = cluster_dao diff --git a/rust/crates/scheduler/src/dao/cluster_dao.rs b/rust/crates/scheduler/src/dao/cluster_dao.rs index a76479b3c..716b175ad 100644 --- a/rust/crates/scheduler/src/dao/cluster_dao.rs +++ b/rust/crates/scheduler/src/dao/cluster_dao.rs @@ -192,6 +192,12 @@ impl ClusterDao { }) } + pub fn with_pool(pool: Arc>) -> Self { + ClusterDao { + connection_pool: pool, + } + } + /// Fetches all allocation-based clusters from the database. /// /// Returns clusters defined by facility, show, and allocation tag combinations. diff --git a/rust/crates/scheduler/src/dao/host_dao.rs b/rust/crates/scheduler/src/dao/host_dao.rs index b517a340e..e075c80c6 100644 --- a/rust/crates/scheduler/src/dao/host_dao.rs +++ b/rust/crates/scheduler/src/dao/host_dao.rs @@ -247,6 +247,12 @@ impl HostDao { }) } + pub fn with_pool(pool: Arc>) -> Self { + HostDao { + connection_pool: pool, + } + } + /// Fetches hosts matching a specific show, facility, and tag. /// /// Finds all open hosts that belong to allocations subscribed to the given show diff --git a/rust/crates/scheduler/src/dao/job_dao.rs b/rust/crates/scheduler/src/dao/job_dao.rs index 73898b531..e1d3a4ccc 100644 --- a/rust/crates/scheduler/src/dao/job_dao.rs +++ b/rust/crates/scheduler/src/dao/job_dao.rs @@ -130,6 +130,12 @@ impl JobDao { }) } + pub fn with_pool(pool: Arc>) -> Self { + JobDao { + connection_pool: pool, + } + } + /// Queries for pending jobs by show, facility, and tag criteria. /// /// Finds jobs that are ready for dispatch based on subscription availability, diff --git a/rust/crates/scheduler/src/dao/layer_dao.rs b/rust/crates/scheduler/src/dao/layer_dao.rs index 62c007c9e..e09446550 100644 --- a/rust/crates/scheduler/src/dao/layer_dao.rs +++ b/rust/crates/scheduler/src/dao/layer_dao.rs @@ -296,6 +296,12 @@ impl LayerDao { }) } + pub fn with_pool(pool: Arc>) -> Self { + LayerDao { + connection_pool: pool, + } + } + /// Fetches layers with their frames in a single batched database query. /// /// Uses a single SQL query with joins to fetch both layers and their frames, diff --git a/rust/crates/scheduler/src/dao/proc_dao.rs b/rust/crates/scheduler/src/dao/proc_dao.rs index 0fb9d87ae..7ce355876 100644 --- a/rust/crates/scheduler/src/dao/proc_dao.rs +++ b/rust/crates/scheduler/src/dao/proc_dao.rs @@ -114,6 +114,12 @@ impl ProcDao { }) } + pub fn with_pool(pool: Arc>) -> Self { + ProcDao { + connection_pool: pool, + } + } + /// Inserts a new proc record into the database within an existing transaction. /// /// Creates a database record representing the allocation of compute resources from a host diff --git a/rust/crates/scheduler/src/dao/resource_accounting_dao.rs b/rust/crates/scheduler/src/dao/resource_accounting_dao.rs index df8f25a23..1e9421744 100644 --- a/rust/crates/scheduler/src/dao/resource_accounting_dao.rs +++ b/rust/crates/scheduler/src/dao/resource_accounting_dao.rs @@ -242,6 +242,12 @@ impl ResourceAccountingDao { }) } + pub fn with_pool(pool: Arc>) -> Self { + ResourceAccountingDao { + connection_pool: pool, + } + } + pub async fn query_show_ids_by_names(&self, show_names: Vec) -> Result> { #[derive(sqlx::FromRow)] struct Row { diff --git a/rust/crates/scheduler/src/lib.rs b/rust/crates/scheduler/src/lib.rs index 7625acd4d..7a5464945 100644 --- a/rust/crates/scheduler/src/lib.rs +++ b/rust/crates/scheduler/src/lib.rs @@ -17,6 +17,7 @@ pub mod dao; pub mod host_cache; pub mod metrics; pub mod models; +pub mod orchestrator; pub mod pgpool; pub mod pipeline; pub mod resource_accounting; diff --git a/rust/crates/scheduler/src/orchestrator/dao.rs b/rust/crates/scheduler/src/orchestrator/dao.rs index 49fbae5b6..563804b4b 100644 --- a/rust/crates/scheduler/src/orchestrator/dao.rs +++ b/rust/crates/scheduler/src/orchestrator/dao.rs @@ -22,7 +22,6 @@ use uuid::Uuid; use crate::cluster::Cluster; use crate::config::CONFIG; -use crate::dao::helpers::parse_uuid; use crate::pgpool::connection_pool; /// Data Access Object for orchestrator tables (scheduler_instance and scheduler_cluster_assignment). @@ -31,12 +30,15 @@ pub struct OrchestratorDao { /// Dedicated connection for holding the session-level advisory lock. /// Lives outside the pool so the lock is retained as long as this connection is open. leader_conn: Mutex>, + /// Optional override for the connection URL used by the dedicated leader connection. + /// When set, `open_dedicated_connection` uses this instead of `CONFIG.database.connection_url()`. + connection_url: Option, } #[derive(sqlx::FromRow, Debug, Clone)] #[allow(dead_code)] pub struct InstanceRow { - pub pk_instance: String, + pub pk_instance: Uuid, pub str_name: String, pub str_facility: Option, pub int_capacity: i32, @@ -47,8 +49,8 @@ pub struct InstanceRow { #[derive(sqlx::FromRow, Debug, Clone)] #[allow(dead_code)] pub struct ClusterAssignmentRow { - pub pk_assignment: String, - pub pk_instance: String, + pub pk_assignment: Uuid, + pub pk_instance: Uuid, pub str_cluster_id: String, pub str_cluster_json: String, pub int_version: i32, @@ -128,13 +130,24 @@ impl OrchestratorDao { Ok(OrchestratorDao { connection_pool: pool, leader_conn: Mutex::new(None), + connection_url: None, }) } - async fn open_dedicated_connection() -> Result { - let options: PgConnectOptions = CONFIG - .database - .connection_url() + pub fn with_pool(pool: Arc>, connection_url: String) -> Self { + OrchestratorDao { + connection_pool: pool, + leader_conn: Mutex::new(None), + connection_url: Some(connection_url), + } + } + + async fn open_dedicated_connection(connection_url: &Option) -> Result { + let url = match connection_url { + Some(url) => url.clone(), + None => CONFIG.database.connection_url(), + }; + let options: PgConnectOptions = url .parse::()? .application_name("opencue-leader-lock"); options.connect().await @@ -150,7 +163,7 @@ impl OrchestratorDao { capacity: i32, ) -> Result<(), sqlx::Error> { sqlx::query(INSERT_INSTANCE) - .bind(instance_id.to_string()) + .bind(instance_id) .bind(name) .bind(facility) .bind(capacity) @@ -165,7 +178,7 @@ impl OrchestratorDao { jobs_queried: f64, ) -> Result<(), sqlx::Error> { let result = sqlx::query(UPDATE_HEARTBEAT) - .bind(instance_id.to_string()) + .bind(instance_id) .bind(jobs_queried) .execute(&*self.connection_pool) .await?; @@ -177,7 +190,7 @@ impl OrchestratorDao { pub async fn set_draining(&self, instance_id: Uuid) -> Result<(), sqlx::Error> { sqlx::query(SET_DRAINING) - .bind(instance_id.to_string()) + .bind(instance_id) .execute(&*self.connection_pool) .await?; Ok(()) @@ -185,7 +198,7 @@ impl OrchestratorDao { pub async fn delete_instance(&self, instance_id: Uuid) -> Result<(), sqlx::Error> { sqlx::query(DELETE_INSTANCE) - .bind(instance_id.to_string()) + .bind(instance_id) .execute(&*self.connection_pool) .await?; Ok(()) @@ -196,14 +209,11 @@ impl OrchestratorDao { failure_threshold: Duration, ) -> Result, sqlx::Error> { let interval = format!("{} seconds", failure_threshold.as_secs()); - let rows: Vec<(String,)> = sqlx::query_as(DELETE_DEAD_INSTANCES) + let rows: Vec<(Uuid,)> = sqlx::query_as(DELETE_DEAD_INSTANCES) .bind(interval) .fetch_all(&*self.connection_pool) .await?; - Ok(rows - .into_iter() - .map(|(id,)| crate::dao::helpers::parse_uuid(&id)) - .collect()) + Ok(rows.into_iter().map(|(id,)| id).collect()) } pub async fn get_live_instances( @@ -217,7 +227,7 @@ impl OrchestratorDao { .await?; Ok(rows .into_iter() - .map(|r| (parse_uuid(&r.pk_instance), r)) + .map(|r| (r.pk_instance, r)) .collect()) } @@ -228,7 +238,7 @@ impl OrchestratorDao { instance_id: Uuid, ) -> Result, sqlx::Error> { sqlx::query_as::<_, ClusterAssignmentRow>(QUERY_ASSIGNMENTS_FOR_INSTANCE) - .bind(instance_id.to_string()) + .bind(instance_id) .fetch_all(&*self.connection_pool) .await } @@ -243,7 +253,7 @@ impl OrchestratorDao { .await?; Ok(rows .into_iter() - .map(|r| (r.str_cluster_id, parse_uuid(&r.pk_instance))) + .map(|r| (r.str_cluster_id, r.pk_instance)) .collect()) } @@ -257,7 +267,7 @@ impl OrchestratorDao { let cluster_json = serde_json::to_string(cluster).expect("Failed to serialize Cluster to JSON"); sqlx::query(UPSERT_ASSIGNMENT) - .bind(instance_id.to_string()) + .bind(instance_id) .bind(&cluster.id) .bind(cluster_json) .execute(&*self.connection_pool) @@ -298,7 +308,7 @@ impl OrchestratorDao { } // Open a new dedicated connection and attempt to acquire the lock. - let mut conn = Self::open_dedicated_connection().await?; + let mut conn = Self::open_dedicated_connection(&self.connection_url).await?; let row: (bool,) = sqlx::query_as(TRY_ADVISORY_LOCK) .bind(lock_id) .fetch_one(&mut conn) diff --git a/rust/crates/scheduler/src/orchestrator/distributor.rs b/rust/crates/scheduler/src/orchestrator/distributor.rs index 730d02fe7..273b0c97b 100644 --- a/rust/crates/scheduler/src/orchestrator/distributor.rs +++ b/rust/crates/scheduler/src/orchestrator/distributor.rs @@ -450,7 +450,7 @@ mod tests { jobs_queried: f64, ) -> InstanceRow { InstanceRow { - pk_instance: id.to_string(), + pk_instance: id, str_name: format!("test:{}", id), str_facility: facility.map(String::from), int_capacity: capacity, diff --git a/rust/crates/scheduler/src/orchestrator/instance.rs b/rust/crates/scheduler/src/orchestrator/instance.rs index 3ae23cce6..223570806 100644 --- a/rust/crates/scheduler/src/orchestrator/instance.rs +++ b/rust/crates/scheduler/src/orchestrator/instance.rs @@ -65,6 +65,22 @@ impl InstanceManager { }) } + /// Creates an instance manager with an externally provided DAO. + /// Useful for testing with an embedded database. + pub fn with_dao(dao: Arc, facility: Option, capacity: i32) -> Self { + let instance_id = Uuid::new_v4(); + let hostname = gethostname::gethostname().to_string_lossy().to_string(); + let pid = std::process::id(); + let instance_name = format!("{}:{}", hostname, pid); + InstanceManager { + instance_id, + instance_name, + facility, + capacity, + dao, + } + } + /// Registers this instance in the scheduler_instance table. /// /// Inserts a row with the instance's ID, name, facility, and capacity so that @@ -108,9 +124,18 @@ impl InstanceManager { /// /// A `JoinHandle` for the spawned heartbeat task. pub fn start_heartbeat(&self, mut shutdown: watch::Receiver) -> JoinHandle<()> { + self.start_heartbeat_with_interval(CONFIG.orchestrator.heartbeat_interval, shutdown) + } + + /// Starts the heartbeat loop with an explicit interval. + /// Useful for testing with fast intervals without depending on CONFIG. + pub fn start_heartbeat_with_interval( + &self, + interval: std::time::Duration, + mut shutdown: watch::Receiver, + ) -> JoinHandle<()> { let instance_id = self.instance_id; let dao = self.dao.clone(); - let interval = CONFIG.orchestrator.heartbeat_interval; tokio::spawn(async move { let mut ticker = tokio::time::interval(interval); diff --git a/rust/crates/scheduler/src/orchestrator/mod.rs b/rust/crates/scheduler/src/orchestrator/mod.rs index 5b7e8dbd0..ca1951ac9 100644 --- a/rust/crates/scheduler/src/orchestrator/mod.rs +++ b/rust/crates/scheduler/src/orchestrator/mod.rs @@ -11,10 +11,10 @@ // the License. pub mod dao; -mod distributor; -mod instance; -mod leader; -mod sync; +pub mod distributor; +pub mod instance; +pub mod leader; +pub mod sync; use miette::Result; use tokio::sync::watch; diff --git a/rust/crates/scheduler/tests/embedded_db.rs b/rust/crates/scheduler/tests/embedded_db.rs new file mode 100644 index 000000000..09ca734c4 --- /dev/null +++ b/rust/crates/scheduler/tests/embedded_db.rs @@ -0,0 +1,178 @@ +// Copyright Contributors to the OpenCue Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. + +use std::path::PathBuf; +use std::sync::Arc; +use std::time::Duration; + +use pg_embed::pg_enums::PgAuthMethod; +use pg_embed::pg_fetch::{PgFetchSettings, PG_V16}; +use pg_embed::postgres::{PgEmbed, PgSettings}; +use sqlx::postgres::PgPoolOptions; +use sqlx::{Executor, Pool, Postgres}; +use tokio::sync::OnceCell; + +/// Shared embedded Postgres instance — started once, reused across all tests. +static EMBEDDED_PG: OnceCell = OnceCell::const_new(); + +/// Starts (or returns) the shared embedded Postgres process. +async fn shared_pg() -> &'static PgEmbed { + EMBEDDED_PG + .get_or_init(|| async { + let db_dir = std::env::temp_dir() + .join(format!("pg_embed_test_{}", std::process::id())); + // Clean up any stale data from a previous crashed run + let _ = std::fs::remove_dir_all(&db_dir); + + // Find an available port by binding to port 0 + let port = { + let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap(); + listener.local_addr().unwrap().port() + }; + + let pg_settings = PgSettings { + database_dir: db_dir, + port, + user: "postgres".to_string(), + password: "password".to_string(), + auth_method: PgAuthMethod::Plain, + persistent: false, + timeout: Some(Duration::from_secs(30)), + migration_dir: None, + }; + + let fetch_settings = PgFetchSettings { + version: PG_V16, + ..Default::default() + }; + + let mut pg = PgEmbed::new(pg_settings, fetch_settings) + .await + .expect("Failed to create PgEmbed instance"); + + pg.setup().await.expect("Failed to setup PgEmbed"); + pg.start_db().await.unwrap_or_else(|e| panic!("Failed to start embedded PG: {:?}", e)); + + pg + }) + .await +} + +/// Creates a fresh database with all migrations and seed data applied. +/// +/// Each call creates a uniquely named database on the shared embedded Postgres +/// instance, runs the Flyway-style migrations from the cuebot module, and loads +/// the seed data. Returns a connection pool and the full database URI. +#[allow(dead_code)] +pub async fn create_test_db(test_name: &str) -> (Arc>, String) { + let pg = shared_pg().await; + + // Unique database name per test invocation + let db_name = format!( + "test_{}_{}", + test_name, + uuid::Uuid::new_v4().as_simple() + ); + + pg.create_database(&db_name) + .await + .unwrap_or_else(|e| panic!("Failed to create database '{}': {}", db_name, e)); + + let url = pg.full_db_uri(&db_name); + + let pool = PgPoolOptions::new() + .max_connections(5) + .acquire_timeout(Duration::from_secs(10)) + .connect(&url) + .await + .unwrap_or_else(|e| panic!("Failed to connect to '{}': {}", db_name, e)); + + run_migrations(&pool).await; + load_seed_data(&pool).await; + + (Arc::new(pool), url) +} + +/// Runs Flyway-style V{N}__{description}.sql migrations in version order. +async fn run_migrations(pool: &Pool) { + let migrations_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("resources/migrations"); + + let mut entries: Vec<_> = std::fs::read_dir(&migrations_dir) + .unwrap_or_else(|e| panic!("Cannot read migrations dir {:?}: {}", migrations_dir, e)) + .filter_map(|e| e.ok()) + .filter(|e| { + e.path() + .extension() + .map_or(false, |ext| ext == "sql") + }) + .collect(); + + // Sort by version number extracted from V{N}__... filename + entries.sort_by_key(|e| { + let name = e.file_name().to_string_lossy().to_string(); + let version: u32 = name + .trim_start_matches('V') + .split("__") + .next() + .expect("Migration file must have V{N}__ prefix") + .parse() + .expect("Version number must be a valid u32"); + version + }); + + for entry in entries { + let sql = std::fs::read_to_string(entry.path()).unwrap_or_else(|e| { + panic!( + "Failed to read migration file {:?}: {}", + entry.path(), + e + ) + }); + pool.execute(sql.as_str()).await.unwrap_or_else(|e| { + panic!( + "Migration {:?} failed: {}", + entry.file_name(), + e + ) + }); + } +} + +/// Loads the seed data (shows, departments, facilities, allocations, subscriptions). +/// +/// Some seed data rows may already exist from migrations (e.g. task_lock entries added +/// by V35 and V38 that are also present in seed_data.sql for backwards compatibility). +/// Each statement is executed individually so that duplicate-key conflicts on specific +/// rows don't prevent the rest of the seed data from loading. +async fn load_seed_data(pool: &Pool) { + let seed_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("resources/seed_data.sql"); + let sql = std::fs::read_to_string(&seed_path) + .unwrap_or_else(|e| panic!("Failed to read seed data {:?}: {}", seed_path, e)); + + // Split on semicolons and execute each statement individually, + // ignoring duplicate key violations (error code 23505). + for statement in sql.split(';') { + let trimmed = statement.trim(); + if trimmed.is_empty() { + continue; + } + match pool.execute(trimmed).await { + Ok(_) => {} + Err(sqlx::Error::Database(db_err)) + if db_err.code().as_deref() == Some("23505") => + { + // Duplicate key — row already inserted by a migration, skip. + } + Err(e) => panic!("Seed data statement failed: {}\nSQL: {}", e, trimmed), + } + } +} diff --git a/rust/crates/scheduler/tests/integration_tests.rs b/rust/crates/scheduler/tests/integration_tests.rs new file mode 100644 index 000000000..9224bf829 --- /dev/null +++ b/rust/crates/scheduler/tests/integration_tests.rs @@ -0,0 +1,263 @@ +// Copyright Contributors to the OpenCue Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. + +mod embedded_db; + +#[cfg(all(test, feature = "integration-tests"))] +mod orchestration_tests { + use std::sync::Arc; + use std::time::Duration; + + use scheduler::orchestrator::dao::OrchestratorDao; + use scheduler::orchestrator::instance::InstanceManager; + use scheduler::orchestrator::leader::LeaderElection; + use serial_test::serial; + use tokio::sync::watch; + + use crate::embedded_db::create_test_db; + + /// Well-known advisory lock ID for the orchestrator leader (same as production). + const ORCHESTRATOR_LOCK_ID: i64 = 0x4F70656E437565; + + // ── Test 1: Instance registration and heartbeat ────────────────────── + + #[tokio::test] + async fn test_instance_register_and_heartbeat() { + let (pool, url) = create_test_db("register").await; + + let dao = Arc::new(OrchestratorDao::with_pool(pool.clone(), url)); + let mgr = InstanceManager::with_dao(dao.clone(), None, 100); + mgr.register().await.expect("register should succeed"); + + // Verify the instance is visible + let instances = dao + .get_live_instances(Duration::from_secs(30)) + .await + .expect("get_live_instances should succeed"); + assert_eq!(instances.len(), 1); + assert!(instances.contains_key(&mgr.instance_id)); + + // Update heartbeat with a counter value + dao.update_heartbeat(mgr.instance_id, 42.0) + .await + .expect("update_heartbeat should succeed"); + + let instances = dao + .get_live_instances(Duration::from_secs(30)) + .await + .expect("get_live_instances should succeed"); + let row = &instances[&mgr.instance_id]; + assert!((row.float_jobs_queried - 42.0).abs() < f64::EPSILON); + } + + // ── Test 2: Leader election with advisory locks ────────────────────── + + #[tokio::test] + #[serial] + async fn test_leader_election_advisory_locks() { + let (pool, url) = create_test_db("leader").await; + + // Two DAOs, each with their own dedicated leader connection + let dao1 = Arc::new(OrchestratorDao::with_pool(pool.clone(), url.clone())); + let dao2 = Arc::new(OrchestratorDao::with_pool(pool.clone(), url)); + + // Instance 1 acquires the lock + let acquired = dao1 + .try_acquire_leader_lock(ORCHESTRATOR_LOCK_ID) + .await + .expect("lock attempt should not error"); + assert!(acquired, "first instance should acquire the lock"); + + // Instance 2 cannot acquire it + let acquired = dao2 + .try_acquire_leader_lock(ORCHESTRATOR_LOCK_ID) + .await + .expect("lock attempt should not error"); + assert!(!acquired, "second instance should NOT acquire the lock"); + + // Instance 1 releases the lock + dao1.release_leader_lock().await; + + // Now instance 2 can acquire it + let acquired = dao2 + .try_acquire_leader_lock(ORCHESTRATOR_LOCK_ID) + .await + .expect("lock attempt should not error"); + assert!(acquired, "second instance should acquire the lock after release"); + + dao2.release_leader_lock().await; + } + + // ── Test 3: Instance deregistration ────────────────────────────────── + + #[tokio::test] + async fn test_instance_deregistration() { + let (pool, url) = create_test_db("deregister").await; + + let dao = Arc::new(OrchestratorDao::with_pool(pool.clone(), url)); + let mgr = InstanceManager::with_dao(dao.clone(), Some("test-facility".to_string()), 100); + mgr.register().await.expect("register should succeed"); + + // Verify it exists + let instances = dao.get_live_instances(Duration::from_secs(30)).await.unwrap(); + assert_eq!(instances.len(), 1); + + // Graceful shutdown removes it + mgr.shutdown().await; + + let instances = dao.get_live_instances(Duration::from_secs(30)).await.unwrap(); + assert_eq!(instances.len(), 0); + } + + // ── Test 4: Multiple instances with different capacities ───────────── + + #[tokio::test] + async fn test_multiple_instances() { + let (pool, url) = create_test_db("multi_instance").await; + + let dao = Arc::new(OrchestratorDao::with_pool(pool.clone(), url)); + + let mgr1 = InstanceManager::with_dao(dao.clone(), None, 100); + let mgr2 = InstanceManager::with_dao(dao.clone(), None, 200); + let mgr3 = InstanceManager::with_dao(dao.clone(), Some("facility-a".to_string()), 50); + + mgr1.register().await.unwrap(); + mgr2.register().await.unwrap(); + mgr3.register().await.unwrap(); + + let instances = dao.get_live_instances(Duration::from_secs(30)).await.unwrap(); + assert_eq!(instances.len(), 3); + + // Verify capacities + assert_eq!(instances[&mgr1.instance_id].int_capacity, 100); + assert_eq!(instances[&mgr2.instance_id].int_capacity, 200); + assert_eq!(instances[&mgr3.instance_id].int_capacity, 50); + + // Remove one + mgr2.shutdown().await; + let instances = dao.get_live_instances(Duration::from_secs(30)).await.unwrap(); + assert_eq!(instances.len(), 2); + assert!(!instances.contains_key(&mgr2.instance_id)); + } + + // ── Test 5: Leader election loop (live async) ──────────────────────── + + #[tokio::test] + #[serial] + async fn test_leader_election_loop() { + let (pool, url) = create_test_db("election_loop").await; + + let dao1 = Arc::new(OrchestratorDao::with_pool(pool.clone(), url.clone())); + let dao2 = Arc::new(OrchestratorDao::with_pool(pool.clone(), url)); + + let election1 = LeaderElection::new(dao1.clone()); + let election2 = LeaderElection::new(dao2.clone()); + + let (_shutdown_tx, shutdown_rx) = watch::channel(false); + + let handle1 = election1.start(vec![], shutdown_rx.clone()); + let handle2 = election2.start(vec![], shutdown_rx.clone()); + + // Wait for election to settle — with CONFIG defaults or fast overrides + tokio::time::sleep(Duration::from_secs(15)).await; + + // Exactly one should be leader + let leader1 = election1.is_leader(); + let leader2 = election2.is_leader(); + assert!( + leader1 ^ leader2, + "Exactly one instance should be leader, got: election1={}, election2={}", + leader1, leader2 + ); + + // Shutdown + let _ = _shutdown_tx.send(true); + let _ = tokio::time::timeout(Duration::from_secs(5), handle1).await; + let _ = tokio::time::timeout(Duration::from_secs(5), handle2).await; + } + + // ── Test 6: Cluster assignment round-trip ──────────────────────────── + + #[tokio::test] + async fn test_cluster_assignment_round_trip() { + use scheduler::cluster::Cluster; + use scheduler::cluster_key::{Tag, TagType}; + + let (pool, url) = create_test_db("assignments").await; + + let dao = Arc::new(OrchestratorDao::with_pool(pool.clone(), url)); + + // Register an instance + let mgr = InstanceManager::with_dao(dao.clone(), None, 100); + mgr.register().await.unwrap(); + + // Create a test cluster + let cluster = Cluster::single_tag( + uuid::Uuid::new_v4(), + uuid::Uuid::new_v4(), + Tag { + name: "general".to_string(), + ttype: TagType::Alloc, + }, + ); + + // Assign cluster to instance + dao.upsert_assignment(mgr.instance_id, &cluster) + .await + .expect("upsert_assignment should succeed"); + + // Read back assignments + let assignments = dao + .get_assignments_for_instance(mgr.instance_id) + .await + .expect("get_assignments should succeed"); + assert_eq!(assignments.len(), 1); + assert_eq!(assignments[0].str_cluster_id, cluster.id); + + // Read all assignments + let all = dao.get_all_assignments().await.unwrap(); + assert_eq!(all.len(), 1); + assert_eq!(all[&cluster.id], mgr.instance_id); + + // Delete assignment + dao.delete_assignment_by_cluster_id(&cluster.id) + .await + .unwrap(); + let all = dao.get_all_assignments().await.unwrap(); + assert!(all.is_empty()); + } + + // ── Test 7: Dead instance cleanup ──────────────────────────────────── + + #[tokio::test] + async fn test_dead_instance_cleanup() { + let (pool, url) = create_test_db("dead_cleanup").await; + let dao = Arc::new(OrchestratorDao::with_pool(pool.clone(), url)); + + // Register instance + let mgr = InstanceManager::with_dao(dao.clone(), None, 100); + mgr.register().await.unwrap(); + + // With a 30s threshold, the just-registered instance should be alive + let dead = dao.delete_dead_instances(Duration::from_secs(30)).await.unwrap(); + assert!(dead.is_empty()); + + // With a 0s threshold, everything is "dead" + let dead = dao.delete_dead_instances(Duration::from_secs(0)).await.unwrap(); + assert_eq!(dead.len(), 1); + assert_eq!(dead[0], mgr.instance_id); + + // Instance should be gone now + let instances = dao.get_live_instances(Duration::from_secs(30)).await.unwrap(); + assert!(instances.is_empty()); + } +} From 6c075cbc2df168b1f0cdfe57c3b0c1536165b7d5 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Thu, 9 Apr 2026 15:38:13 -0700 Subject: [PATCH 09/16] Add more integration tests to orchestrator --- .../scheduler/src/orchestrator/distributor.rs | 2 +- .../scheduler/tests/integration_tests.rs | 326 +++++++++++++++++- 2 files changed, 326 insertions(+), 2 deletions(-) diff --git a/rust/crates/scheduler/src/orchestrator/distributor.rs b/rust/crates/scheduler/src/orchestrator/distributor.rs index 273b0c97b..889787754 100644 --- a/rust/crates/scheduler/src/orchestrator/distributor.rs +++ b/rust/crates/scheduler/src/orchestrator/distributor.rs @@ -255,7 +255,7 @@ impl Distributor { /// Strategy: /// 1. Preserve stable assignments (cluster stays on same live instance). /// 2. Assign unassigned clusters to the instance with the lowest load ratio. - fn compute_assignments( + pub fn compute_assignments( all_clusters: &[Cluster], rated_instances: &HashMap, active_assignments: &HashMap, diff --git a/rust/crates/scheduler/tests/integration_tests.rs b/rust/crates/scheduler/tests/integration_tests.rs index 9224bf829..8a24aef1c 100644 --- a/rust/crates/scheduler/tests/integration_tests.rs +++ b/rust/crates/scheduler/tests/integration_tests.rs @@ -14,17 +14,51 @@ mod embedded_db; #[cfg(all(test, feature = "integration-tests"))] mod orchestration_tests { + use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; - use scheduler::orchestrator::dao::OrchestratorDao; + use scheduler::cluster::Cluster; + use scheduler::cluster_key::{Tag, TagType}; + use scheduler::orchestrator::dao::{InstanceRow, OrchestratorDao}; + use scheduler::orchestrator::distributor::Distributor; use scheduler::orchestrator::instance::InstanceManager; use scheduler::orchestrator::leader::LeaderElection; use serial_test::serial; use tokio::sync::watch; + use uuid::Uuid; use crate::embedded_db::create_test_db; + /// Creates `count` test clusters sharing the same facility and show. + fn make_test_clusters(count: usize) -> (Uuid, Uuid, Vec) { + let facility = Uuid::new_v4(); + let show = Uuid::new_v4(); + let clusters = (0..count) + .map(|i| { + Cluster::single_tag( + facility, + show, + Tag { + name: format!("tag{}", i), + ttype: TagType::Alloc, + }, + ) + }) + .collect(); + (facility, show, clusters) + } + + /// Wraps DAO instance rows into rated instances with rate 0.0 (bootstrap). + fn rated_instances_from_dao( + instances: HashMap, + ) -> HashMap { + instances + .into_iter() + .map(|(id, row)| (id, (row, 0.0))) + .collect() + } + /// Well-known advisory lock ID for the orchestrator leader (same as production). const ORCHESTRATOR_LOCK_ID: i64 = 0x4F70656E437565; @@ -260,4 +294,294 @@ mod orchestration_tests { let instances = dao.get_live_instances(Duration::from_secs(30)).await.unwrap(); assert!(instances.is_empty()); } + + // ── Test 8: Cluster redistribution when instance goes offline ──────── + + #[tokio::test] + async fn test_cluster_redistribution_on_instance_offline() { + let (pool, url) = create_test_db("redistribution").await; + let dao = Arc::new(OrchestratorDao::with_pool(pool.clone(), url)); + + // Register 3 instances + let mgr_a = InstanceManager::with_dao(dao.clone(), None, 100); + let mgr_b = InstanceManager::with_dao(dao.clone(), None, 100); + let mgr_c = InstanceManager::with_dao(dao.clone(), None, 100); + mgr_a.register().await.unwrap(); + mgr_b.register().await.unwrap(); + mgr_c.register().await.unwrap(); + + // Create 6 clusters and compute initial assignments (2 per instance) + let (_fac, _show, clusters) = make_test_clusters(6); + let instances = dao.get_live_instances(Duration::from_secs(30)).await.unwrap(); + let rated = rated_instances_from_dao(instances); + let initial_assignments = + Distributor::compute_assignments(&clusters, &rated, &HashMap::new()); + + assert_eq!(initial_assignments.len(), 6); + let count_a = initial_assignments.values().filter(|&&v| v == mgr_a.instance_id).count(); + let count_b = initial_assignments.values().filter(|&&v| v == mgr_b.instance_id).count(); + let count_c = initial_assignments.values().filter(|&&v| v == mgr_c.instance_id).count(); + assert_eq!(count_a, 2); + assert_eq!(count_b, 2); + assert_eq!(count_c, 2); + + // Write assignments to DB + for cluster in &clusters { + let inst = initial_assignments[&cluster.id]; + dao.upsert_assignment(inst, cluster).await.unwrap(); + } + assert_eq!(dao.get_all_assignments().await.unwrap().len(), 6); + + // Simulate instance B going offline (cascade deletes its assignments) + mgr_b.shutdown().await; + + // Verify: only 4 assignments remain (B's 2 were cascade-deleted) + let remaining = dao.get_all_assignments().await.unwrap(); + assert_eq!(remaining.len(), 4, "cascade delete should remove B's assignments"); + assert!( + !remaining.values().any(|&v| v == mgr_b.instance_id), + "no assignments should reference the dead instance" + ); + + // Re-run distribution with surviving instances + let surviving = dao.get_live_instances(Duration::from_secs(30)).await.unwrap(); + assert_eq!(surviving.len(), 2); + let rated_surviving = rated_instances_from_dao(surviving); + let new_assignments = + Distributor::compute_assignments(&clusters, &rated_surviving, &remaining); + + // All 6 clusters should now be assigned to the 2 surviving instances + assert_eq!(new_assignments.len(), 6); + assert!( + !new_assignments.values().any(|&v| v == mgr_b.instance_id), + "dead instance should not receive assignments" + ); + + // The 4 stable assignments should remain on their original instances + for (cluster_id, &original_inst) in &remaining { + assert_eq!( + new_assignments[cluster_id], original_inst, + "stable assignment for {} should not move", + cluster_id + ); + } + + // Apply to DB and verify + for cluster in &clusters { + let inst = new_assignments[&cluster.id]; + dao.upsert_assignment(inst, cluster).await.unwrap(); + } + let final_assignments = dao.get_all_assignments().await.unwrap(); + assert_eq!(final_assignments.len(), 6); + } + + // ── Test 9: Leader reelection when leader goes offline ────────────── + + #[tokio::test] + #[serial] + async fn test_leader_reelection_on_leader_offline() { + let (pool, url) = create_test_db("reelection").await; + + let dao1 = Arc::new(OrchestratorDao::with_pool(pool.clone(), url.clone())); + let dao2 = Arc::new(OrchestratorDao::with_pool(pool.clone(), url)); + + let election1 = LeaderElection::new(dao1.clone()); + let election2 = LeaderElection::new(dao2.clone()); + + let (shutdown_tx, shutdown_rx) = watch::channel(false); + + let handle1 = election1.start(vec![], shutdown_rx.clone()); + let handle2 = election2.start(vec![], shutdown_rx.clone()); + + // Wait for initial election to settle + tokio::time::sleep(Duration::from_secs(15)).await; + + // Exactly one should be leader + let leader1 = election1.is_leader(); + let leader2 = election2.is_leader(); + assert!( + leader1 ^ leader2, + "Exactly one should be leader: e1={}, e2={}", + leader1, leader2 + ); + + // Identify who is leader and simulate their crash by releasing the lock + let (leader_dao, standby_election) = if leader1 { + (&dao1, &election2) + } else { + (&dao2, &election1) + }; + leader_dao.release_leader_lock().await; + + // Wait for the standby to detect the free lock and acquire it + tokio::time::sleep(Duration::from_secs(15)).await; + + assert!( + standby_election.is_leader(), + "the standby instance should have become leader after the original leader went offline" + ); + + // Shutdown + let _ = shutdown_tx.send(true); + let _ = tokio::time::timeout(Duration::from_secs(5), handle1).await; + let _ = tokio::time::timeout(Duration::from_secs(5), handle2).await; + } + + // ── Test 10: Load rebalance when a new instance joins ─────────────── + + #[tokio::test] + async fn test_load_rebalance_on_new_instance_join() { + let (pool, url) = create_test_db("rebalance_join").await; + let dao = Arc::new(OrchestratorDao::with_pool(pool.clone(), url)); + + // Start with a single instance holding all clusters + let mgr_a = InstanceManager::with_dao(dao.clone(), None, 100); + mgr_a.register().await.unwrap(); + + let (_fac, _show, clusters) = make_test_clusters(6); + + // Assign all 6 clusters to instance A + for cluster in &clusters { + dao.upsert_assignment(mgr_a.instance_id, cluster).await.unwrap(); + } + let active_assignments = dao.get_all_assignments().await.unwrap(); + assert_eq!(active_assignments.len(), 6); + assert!(active_assignments.values().all(|&v| v == mgr_a.instance_id)); + + // New instance B joins the cluster + let mgr_b = InstanceManager::with_dao(dao.clone(), None, 100); + mgr_b.register().await.unwrap(); + + let instances = dao.get_live_instances(Duration::from_secs(30)).await.unwrap(); + assert_eq!(instances.len(), 2); + let rated = rated_instances_from_dao(instances); + + // With active assignments, stability preserves all on A (no migration) + let stable_assignments = + Distributor::compute_assignments(&clusters, &rated, &active_assignments); + assert_eq!(stable_assignments.len(), 6); + let all_on_a = stable_assignments.values().all(|&v| v == mgr_a.instance_id); + assert!( + all_on_a, + "with active (non-expired) assignments, stability should keep all clusters on A" + ); + + // Simulate TTL expiration: pass empty active_assignments (all expired) + let after_ttl = + Distributor::compute_assignments(&clusters, &rated, &HashMap::new()); + assert_eq!(after_ttl.len(), 6); + + let count_a = after_ttl.values().filter(|&&v| v == mgr_a.instance_id).count(); + let count_b = after_ttl.values().filter(|&&v| v == mgr_b.instance_id).count(); + assert_eq!( + count_a, 3, + "after TTL expiration, clusters should be evenly distributed (A)" + ); + assert_eq!( + count_b, 3, + "after TTL expiration, clusters should be evenly distributed (B)" + ); + + // Apply the rebalanced assignments to DB + for cluster in &clusters { + let inst = after_ttl[&cluster.id]; + dao.upsert_assignment(inst, cluster).await.unwrap(); + } + let final_assignments = dao.get_all_assignments().await.unwrap(); + assert_eq!(final_assignments.len(), 6); + let final_a = final_assignments.values().filter(|&&v| v == mgr_a.instance_id).count(); + let final_b = final_assignments.values().filter(|&&v| v == mgr_b.instance_id).count(); + assert_eq!(final_a, 3); + assert_eq!(final_b, 3); + } + + // ── Test 11: Fresh start after all nodes become offline ───────────── + + #[tokio::test] + #[serial] + async fn test_fresh_start_after_total_failure() { + let (pool, url) = create_test_db("fresh_start").await; + let dao = Arc::new(OrchestratorDao::with_pool(pool.clone(), url.clone())); + + // Set up initial cluster: 2 instances with 6 clusters distributed + let mgr_a = InstanceManager::with_dao(dao.clone(), None, 100); + let mgr_b = InstanceManager::with_dao(dao.clone(), None, 100); + mgr_a.register().await.unwrap(); + mgr_b.register().await.unwrap(); + + let (_fac, _show, clusters) = make_test_clusters(6); + let instances = dao.get_live_instances(Duration::from_secs(30)).await.unwrap(); + let rated = rated_instances_from_dao(instances); + let initial_assignments = + Distributor::compute_assignments(&clusters, &rated, &HashMap::new()); + + for cluster in &clusters { + let inst = initial_assignments[&cluster.id]; + dao.upsert_assignment(inst, cluster).await.unwrap(); + } + assert_eq!(dao.get_all_assignments().await.unwrap().len(), 6); + + // Leader acquires the advisory lock + let acquired = dao + .try_acquire_leader_lock(ORCHESTRATOR_LOCK_ID) + .await + .expect("lock attempt should not error"); + assert!(acquired, "should acquire leader lock"); + + // ── Simulate total failure ── + // Leader connection drops + dao.release_leader_lock().await; + assert!(!dao.is_leader_lock_held().await, "lock should be released"); + + // All instances die (0s threshold = everything is dead) + let dead = dao.delete_dead_instances(Duration::from_secs(0)).await.unwrap(); + assert_eq!(dead.len(), 2, "both instances should be removed"); + + // Verify total wipeout + let live = dao.get_live_instances(Duration::from_secs(30)).await.unwrap(); + assert!(live.is_empty(), "no instances should be alive"); + let assignments = dao.get_all_assignments().await.unwrap(); + assert!(assignments.is_empty(), "cascade delete should remove all assignments"); + + // ── Fresh start: new instances come online ── + let dao_new = Arc::new(OrchestratorDao::with_pool(pool.clone(), url)); + + // New leader can acquire the lock (it was freed when old leader's connection closed) + let acquired = dao_new + .try_acquire_leader_lock(ORCHESTRATOR_LOCK_ID) + .await + .expect("lock attempt should not error"); + assert!(acquired, "new instance should acquire the leader lock after total failure"); + + // Register fresh instances + let mgr_c = InstanceManager::with_dao(dao_new.clone(), None, 100); + let mgr_d = InstanceManager::with_dao(dao_new.clone(), None, 100); + mgr_c.register().await.unwrap(); + mgr_d.register().await.unwrap(); + + let new_instances = dao_new.get_live_instances(Duration::from_secs(30)).await.unwrap(); + assert_eq!(new_instances.len(), 2); + + // Compute fresh assignments from scratch (no prior state) + let new_rated = rated_instances_from_dao(new_instances); + let fresh_assignments = + Distributor::compute_assignments(&clusters, &new_rated, &HashMap::new()); + + assert_eq!(fresh_assignments.len(), 6); + let count_c = fresh_assignments.values().filter(|&&v| v == mgr_c.instance_id).count(); + let count_d = fresh_assignments.values().filter(|&&v| v == mgr_d.instance_id).count(); + assert_eq!(count_c, 3, "fresh distribution should be even (C)"); + assert_eq!(count_d, 3, "fresh distribution should be even (D)"); + + // Apply to DB + for cluster in &clusters { + let inst = fresh_assignments[&cluster.id]; + dao_new.upsert_assignment(inst, cluster).await.unwrap(); + } + let final_assignments = dao_new.get_all_assignments().await.unwrap(); + assert_eq!(final_assignments.len(), 6); + + // Cleanup + dao_new.release_leader_lock().await; + } } From f61e7262b4bcc65c652ea59b49c591fc6c0b40a2 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Fri, 10 Apr 2026 10:04:47 -0700 Subject: [PATCH 10/16] Add orchestrator examples to scheduler config --- rust/config/scheduler.yaml | 39 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/rust/config/scheduler.yaml b/rust/config/scheduler.yaml index 95b87a05a..0170de18f 100644 --- a/rust/config/scheduler.yaml +++ b/rust/config/scheduler.yaml @@ -241,3 +241,42 @@ scheduler: # ignore_tags: # - tag_to_ignore1 # - tag_to_ignore2 + +# ============================================================================= +# ORCHESTRATOR CONFIGURATION +# ============================================================================= +# Controls leader election, heartbeating, and cluster distribution for +# multi-instance deployments. All durations use humantime format (e.g. 5s, 30s). +# orchestrator: + # How often this instance updates its heartbeat + # Default: 5s + # heartbeat_interval: 5s + + # Instance is considered dead after this duration without heartbeat + # Default: 30s + # failure_threshold: 30s + + # How often the leader recalculates cluster distribution + # Default: 10s + # distribution_interval: 10s + + # How often workers poll for assignment changes + # Default: 5s + # poll_interval: 5s + + # How often non-leaders attempt to acquire the leader lock + # Default: 10s + # election_interval: 10s + + # Relative capacity weight of this instance (higher = more clusters assigned) + # Default: 100 + # capacity: 100 + + # Graceful shutdown timeout before force-killing in-flight work + # Default: 30s + # shutdown_timeout: 30s + + # How long a cluster assignment is preserved before becoming eligible for + # redistribution. Prevents new instances from remaining idle. + # Default: 120s + # assignment_ttl: 120s From f28bbb13176d48e7dca123943fa051c2ad4fc7f5 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Fri, 10 Apr 2026 10:50:19 -0700 Subject: [PATCH 11/16] Fix bug trying to match facility code with id This bug prevents scoped instances from matching clusters --- rust/crates/scheduler/src/orchestrator/dao.rs | 10 +- .../scheduler/src/orchestrator/distributor.rs | 227 +++++++++++++++++- 2 files changed, 221 insertions(+), 16 deletions(-) diff --git a/rust/crates/scheduler/src/orchestrator/dao.rs b/rust/crates/scheduler/src/orchestrator/dao.rs index 563804b4b..45ee5865f 100644 --- a/rust/crates/scheduler/src/orchestrator/dao.rs +++ b/rust/crates/scheduler/src/orchestrator/dao.rs @@ -40,6 +40,7 @@ pub struct OrchestratorDao { pub struct InstanceRow { pub pk_instance: Uuid, pub str_name: String, + pub str_facility_id: Option, pub str_facility: Option, pub int_capacity: i32, pub float_jobs_queried: f64, @@ -87,10 +88,11 @@ RETURNING pk_instance "#; static QUERY_LIVE_INSTANCES: &str = r#" -SELECT pk_instance, str_name, str_facility, int_capacity, float_jobs_queried, b_draining -FROM scheduler_instance -WHERE ts_heartbeat >= NOW() - $1::interval - AND b_draining = FALSE +SELECT si.pk_instance, si.str_name, f.pk_facility AS str_facility_id, si.str_facility, si.int_capacity, si.float_jobs_queried, si.b_draining +FROM scheduler_instance si +LEFT JOIN facility f ON LOWER(f.str_name) = LOWER(si.str_facility) +WHERE si.ts_heartbeat >= NOW() - $1::interval + AND si.b_draining = FALSE "#; // --- Cluster assignment queries --- diff --git a/rust/crates/scheduler/src/orchestrator/distributor.rs b/rust/crates/scheduler/src/orchestrator/distributor.rs index 889787754..0e4eccc00 100644 --- a/rust/crates/scheduler/src/orchestrator/distributor.rs +++ b/rust/crates/scheduler/src/orchestrator/distributor.rs @@ -31,6 +31,15 @@ struct RateSnapshot { timestamp: Instant, } +/// Resolved facility binding for an instance. +/// Captures both the configured facility name and its resolved UUID from the facility table. +struct InstanceFacility { + /// Facility name as configured on the instance (e.g. "spi"). `None` means unscoped. + name: Option, + /// UUID resolved via the facility table. `None` when unscoped or when the name didn't resolve. + id: Option, +} + /// The distributor runs on the leader instance. It loads all clusters from the database, /// reads live instances, computes load rates, and assigns clusters to instances. pub struct Distributor { @@ -277,9 +286,14 @@ impl Distributor { let all_rates_zero = rated_instances.values().all(|(_, rate)| *rate == 0.0); // Build facility map for affinity filtering - let instance_facilities: HashMap> = rated_instances + let instance_facilities: HashMap = rated_instances .iter() - .map(|(&id, (inst, _))| (id, inst.str_facility.clone())) + .map(|(&id, (inst, _))| { + (id, InstanceFacility { + name: inst.str_facility.clone(), + id: inst.str_facility_id, + }) + }) .collect(); // First pass: preserve stable assignments where the instance is still alive @@ -362,13 +376,21 @@ impl Distributor { /// Checks whether a cluster is eligible to run on a given instance based on facility. fn is_facility_eligible( cluster: &Cluster, - instance_facilities: &HashMap>, + instance_facilities: &HashMap, instance_id: Uuid, ) -> bool { match instance_facilities.get(&instance_id) { - Some(Some(facility)) => { - // Instance is scoped to a facility — cluster must match - cluster.facility_id.to_string().to_lowercase() == facility.to_lowercase() + Some(InstanceFacility { name: Some(_), id: Some(facility_id) }) => { + // Instance is scoped to a resolved facility — cluster must match + cluster.facility_id == *facility_id + } + Some(InstanceFacility { name: Some(name), id: None }) => { + // Instance has a facility name that didn't resolve — no cluster can match + warn!( + "Instance {} has unresolved facility name '{}'", + instance_id, name + ); + false } _ => { // Instance has no facility scope — accepts all clusters @@ -426,7 +448,7 @@ mod tests { use crate::cluster::Cluster; use crate::cluster_key::{Tag, TagType}; - use super::{Distributor, InstanceRow}; + use super::{Distributor, InstanceFacility, InstanceRow}; fn make_cluster(facility_id: Uuid, show_id: Uuid, tag: &str) -> Cluster { Cluster::single_tag( @@ -439,20 +461,21 @@ mod tests { ) } - fn make_instance(id: Uuid, facility: Option<&str>, capacity: i32) -> InstanceRow { + fn make_instance(id: Uuid, facility: Option, capacity: i32) -> InstanceRow { make_instance_with_jobs(id, facility, capacity, 0.0) } fn make_instance_with_jobs( id: Uuid, - facility: Option<&str>, + facility: Option, capacity: i32, jobs_queried: f64, ) -> InstanceRow { InstanceRow { pk_instance: id, str_name: format!("test:{}", id), - str_facility: facility.map(String::from), + str_facility_id: facility, + str_facility: facility.map(|f| f.to_string()), int_capacity: capacity, float_jobs_queried: jobs_queried, b_draining: false, @@ -537,14 +560,14 @@ mod tests { ( inst_a, ( - make_instance(inst_a, Some(&facility_a.to_string()), 100), + make_instance(inst_a, Some(facility_a), 100), 0.0, ), ), ( inst_b, ( - make_instance(inst_b, Some(&facility_b.to_string()), 100), + make_instance(inst_b, Some(facility_b), 100), 0.0, ), ), @@ -906,4 +929,184 @@ mod tests { assert_eq!(assignments[&clusters[2].id], inst_a); assert_eq!(assignments[&clusters[3].id], inst_a); } + + // --- is_facility_eligible unit tests --- + + #[test] + fn test_facility_eligible_matching_facility() { + let facility = Uuid::new_v4(); + let show = Uuid::new_v4(); + let cluster = make_cluster(facility, show, "tag"); + let instance_id = Uuid::new_v4(); + + let facilities = HashMap::from([(instance_id, InstanceFacility { + name: Some(facility.to_string()), + id: Some(facility), + })]); + + assert!(Distributor::is_facility_eligible(&cluster, &facilities, instance_id)); + } + + #[test] + fn test_facility_eligible_mismatched_facility() { + let facility_a = Uuid::new_v4(); + let facility_b = Uuid::new_v4(); + let show = Uuid::new_v4(); + let cluster = make_cluster(facility_a, show, "tag"); + let instance_id = Uuid::new_v4(); + + let facilities = HashMap::from([(instance_id, InstanceFacility { + name: Some(facility_b.to_string()), + id: Some(facility_b), + })]); + + assert!(!Distributor::is_facility_eligible(&cluster, &facilities, instance_id)); + } + + #[test] + fn test_facility_eligible_unscoped_instance_accepts_all() { + let facility = Uuid::new_v4(); + let show = Uuid::new_v4(); + let cluster = make_cluster(facility, show, "tag"); + let instance_id = Uuid::new_v4(); + + let facilities = HashMap::from([(instance_id, InstanceFacility { + name: None, + id: None, + })]); + + assert!(Distributor::is_facility_eligible(&cluster, &facilities, instance_id)); + } + + #[test] + fn test_facility_eligible_unresolved_name_rejects() { + let facility = Uuid::new_v4(); + let show = Uuid::new_v4(); + let cluster = make_cluster(facility, show, "tag"); + let instance_id = Uuid::new_v4(); + + // Instance has a facility name but it didn't resolve to a UUID + let facilities = HashMap::from([(instance_id, InstanceFacility { + name: Some("nonexistent".to_string()), + id: None, + })]); + + assert!(!Distributor::is_facility_eligible(&cluster, &facilities, instance_id)); + } + + // --- compute_assignments facility tests --- + + fn make_instance_unresolved_facility(id: Uuid, name: &str, capacity: i32) -> InstanceRow { + InstanceRow { + pk_instance: id, + str_name: format!("test:{}", id), + str_facility_id: None, + str_facility: Some(name.to_string()), + int_capacity: capacity, + float_jobs_queried: 0.0, + b_draining: false, + } + } + + #[test] + fn test_facility_scoped_instances_no_cross_assignment() { + let facility_a = Uuid::new_v4(); + let facility_b = Uuid::new_v4(); + let show = Uuid::new_v4(); + + // 3 clusters per facility + let clusters_a: Vec = (0..3) + .map(|i| make_cluster(facility_a, show, &format!("a_tag{}", i))) + .collect(); + let clusters_b: Vec = (0..3) + .map(|i| make_cluster(facility_b, show, &format!("b_tag{}", i))) + .collect(); + let all_clusters: Vec = clusters_a.iter().chain(&clusters_b).cloned().collect(); + + let inst_a = Uuid::new_v4(); + let inst_b = Uuid::new_v4(); + let rated_instances: HashMap = [ + (inst_a, (make_instance(inst_a, Some(facility_a), 100), 0.0)), + (inst_b, (make_instance(inst_b, Some(facility_b), 100), 0.0)), + ] + .into(); + + let assignments = + Distributor::compute_assignments(&all_clusters, &rated_instances, &HashMap::new()); + + assert_eq!(assignments.len(), 6); + + // facility_a clusters must go to inst_a, facility_b clusters to inst_b + for c in &clusters_a { + assert_eq!( + assignments[&c.id], inst_a, + "Cluster {} (facility_a) should be on inst_a", + c.id + ); + } + for c in &clusters_b { + assert_eq!( + assignments[&c.id], inst_b, + "Cluster {} (facility_b) should be on inst_b", + c.id + ); + } + } + + #[test] + fn test_unresolved_facility_instance_gets_no_clusters() { + let facility = Uuid::new_v4(); + let show = Uuid::new_v4(); + + let clusters: Vec = (0..4) + .map(|i| make_cluster(facility, show, &format!("tag{}", i))) + .collect(); + + let inst_good = Uuid::new_v4(); + let inst_bad = Uuid::new_v4(); + let rated_instances: HashMap = [ + (inst_good, (make_instance(inst_good, None, 100), 0.0)), + ( + inst_bad, + (make_instance_unresolved_facility(inst_bad, "bogus", 100), 0.0), + ), + ] + .into(); + + let assignments = + Distributor::compute_assignments(&clusters, &rated_instances, &HashMap::new()); + + // All clusters should go to inst_good; inst_bad has an unresolved facility + assert_eq!(assignments.len(), 4); + for id in assignments.values() { + assert_eq!(*id, inst_good); + } + } + + #[test] + fn test_mixed_scoped_and_unscoped_instances() { + let facility_a = Uuid::new_v4(); + let facility_b = Uuid::new_v4(); + let show = Uuid::new_v4(); + + let cluster_a = make_cluster(facility_a, show, "tag_a"); + let cluster_b = make_cluster(facility_b, show, "tag_b"); + let clusters = vec![cluster_a.clone(), cluster_b.clone()]; + + // inst_scoped only handles facility_a, inst_unscoped handles anything + let inst_scoped = Uuid::new_v4(); + let inst_unscoped = Uuid::new_v4(); + let rated_instances: HashMap = [ + (inst_scoped, (make_instance(inst_scoped, Some(facility_a), 100), 0.0)), + (inst_unscoped, (make_instance(inst_unscoped, None, 100), 0.0)), + ] + .into(); + + let assignments = + Distributor::compute_assignments(&clusters, &rated_instances, &HashMap::new()); + + assert_eq!(assignments.len(), 2); + // cluster_a can go to either (both are eligible), but cluster_b can only go to unscoped + assert_eq!(assignments[&cluster_b.id], inst_unscoped); + } } From ecdc57eabd2b517dce80b2f3b8882d08d3ca305a Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Fri, 10 Apr 2026 14:22:32 -0700 Subject: [PATCH 12/16] Refactor facility resolution on cluster assignments --- ...V39__Add_scheduler_orchestrator_tables.sql | 2 +- rust/crates/scheduler/src/orchestrator/dao.rs | 14 +-- .../scheduler/src/orchestrator/distributor.rs | 109 ++--------------- .../scheduler/src/orchestrator/instance.rs | 33 ++++-- .../scheduler/tests/integration_tests.rs | 110 +++++++++++++++++- 5 files changed, 149 insertions(+), 119 deletions(-) diff --git a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V39__Add_scheduler_orchestrator_tables.sql b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V39__Add_scheduler_orchestrator_tables.sql index 23ec014ff..576096c54 100644 --- a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V39__Add_scheduler_orchestrator_tables.sql +++ b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V39__Add_scheduler_orchestrator_tables.sql @@ -3,7 +3,7 @@ CREATE TABLE scheduler_instance ( pk_instance UUID PRIMARY KEY, str_name VARCHAR(256) NOT NULL, - str_facility VARCHAR(256), + pk_facility VARCHAR(36) REFERENCES facility(pk_facility), ts_heartbeat TIMESTAMPTZ NOT NULL DEFAULT NOW(), ts_registered TIMESTAMPTZ NOT NULL DEFAULT NOW(), int_capacity INTEGER NOT NULL DEFAULT 100, diff --git a/rust/crates/scheduler/src/orchestrator/dao.rs b/rust/crates/scheduler/src/orchestrator/dao.rs index 45ee5865f..a0f4b60d5 100644 --- a/rust/crates/scheduler/src/orchestrator/dao.rs +++ b/rust/crates/scheduler/src/orchestrator/dao.rs @@ -40,8 +40,7 @@ pub struct OrchestratorDao { pub struct InstanceRow { pub pk_instance: Uuid, pub str_name: String, - pub str_facility_id: Option, - pub str_facility: Option, + pub pk_facility: Option, pub int_capacity: i32, pub float_jobs_queried: f64, pub b_draining: bool, @@ -60,7 +59,7 @@ pub struct ClusterAssignmentRow { // --- Instance queries --- static INSERT_INSTANCE: &str = r#" -INSERT INTO scheduler_instance (pk_instance, str_name, str_facility, int_capacity, ts_heartbeat, ts_registered) +INSERT INTO scheduler_instance (pk_instance, str_name, pk_facility, int_capacity, ts_heartbeat, ts_registered) VALUES ($1, $2, $3, $4, NOW(), NOW()) "#; @@ -88,11 +87,10 @@ RETURNING pk_instance "#; static QUERY_LIVE_INSTANCES: &str = r#" -SELECT si.pk_instance, si.str_name, f.pk_facility AS str_facility_id, si.str_facility, si.int_capacity, si.float_jobs_queried, si.b_draining -FROM scheduler_instance si -LEFT JOIN facility f ON LOWER(f.str_name) = LOWER(si.str_facility) -WHERE si.ts_heartbeat >= NOW() - $1::interval - AND si.b_draining = FALSE +SELECT pk_instance, str_name, pk_facility, int_capacity, float_jobs_queried, b_draining +FROM scheduler_instance +WHERE ts_heartbeat >= NOW() - $1::interval + AND b_draining = FALSE "#; // --- Cluster assignment queries --- diff --git a/rust/crates/scheduler/src/orchestrator/distributor.rs b/rust/crates/scheduler/src/orchestrator/distributor.rs index 0e4eccc00..4ad4aaba9 100644 --- a/rust/crates/scheduler/src/orchestrator/distributor.rs +++ b/rust/crates/scheduler/src/orchestrator/distributor.rs @@ -21,6 +21,7 @@ use uuid::Uuid; use crate::cluster::{Cluster, ClusterFeed}; use crate::config::CONFIG; +use crate::dao::helpers::parse_uuid; use super::dao::{InstanceRow, OrchestratorDao}; @@ -31,15 +32,6 @@ struct RateSnapshot { timestamp: Instant, } -/// Resolved facility binding for an instance. -/// Captures both the configured facility name and its resolved UUID from the facility table. -struct InstanceFacility { - /// Facility name as configured on the instance (e.g. "spi"). `None` means unscoped. - name: Option, - /// UUID resolved via the facility table. `None` when unscoped or when the name didn't resolve. - id: Option, -} - /// The distributor runs on the leader instance. It loads all clusters from the database, /// reads live instances, computes load rates, and assigns clusters to instances. pub struct Distributor { @@ -286,13 +278,10 @@ impl Distributor { let all_rates_zero = rated_instances.values().all(|(_, rate)| *rate == 0.0); // Build facility map for affinity filtering - let instance_facilities: HashMap = rated_instances + let instance_facilities: HashMap> = rated_instances .iter() .map(|(&id, (inst, _))| { - (id, InstanceFacility { - name: inst.str_facility.clone(), - id: inst.str_facility_id, - }) + (id, inst.pk_facility.as_deref().map(parse_uuid)) }) .collect(); @@ -376,22 +365,14 @@ impl Distributor { /// Checks whether a cluster is eligible to run on a given instance based on facility. fn is_facility_eligible( cluster: &Cluster, - instance_facilities: &HashMap, + instance_facilities: &HashMap>, instance_id: Uuid, ) -> bool { match instance_facilities.get(&instance_id) { - Some(InstanceFacility { name: Some(_), id: Some(facility_id) }) => { - // Instance is scoped to a resolved facility — cluster must match + Some(Some(facility_id)) => { + // Instance is scoped to a facility — cluster must match cluster.facility_id == *facility_id } - Some(InstanceFacility { name: Some(name), id: None }) => { - // Instance has a facility name that didn't resolve — no cluster can match - warn!( - "Instance {} has unresolved facility name '{}'", - instance_id, name - ); - false - } _ => { // Instance has no facility scope — accepts all clusters true @@ -448,7 +429,7 @@ mod tests { use crate::cluster::Cluster; use crate::cluster_key::{Tag, TagType}; - use super::{Distributor, InstanceFacility, InstanceRow}; + use super::{Distributor, InstanceRow}; fn make_cluster(facility_id: Uuid, show_id: Uuid, tag: &str) -> Cluster { Cluster::single_tag( @@ -474,8 +455,7 @@ mod tests { InstanceRow { pk_instance: id, str_name: format!("test:{}", id), - str_facility_id: facility, - str_facility: facility.map(|f| f.to_string()), + pk_facility: facility.map(|f| f.to_string()), int_capacity: capacity, float_jobs_queried: jobs_queried, b_draining: false, @@ -939,10 +919,7 @@ mod tests { let cluster = make_cluster(facility, show, "tag"); let instance_id = Uuid::new_v4(); - let facilities = HashMap::from([(instance_id, InstanceFacility { - name: Some(facility.to_string()), - id: Some(facility), - })]); + let facilities = HashMap::from([(instance_id, Some(facility))]); assert!(Distributor::is_facility_eligible(&cluster, &facilities, instance_id)); } @@ -955,10 +932,7 @@ mod tests { let cluster = make_cluster(facility_a, show, "tag"); let instance_id = Uuid::new_v4(); - let facilities = HashMap::from([(instance_id, InstanceFacility { - name: Some(facility_b.to_string()), - id: Some(facility_b), - })]); + let facilities = HashMap::from([(instance_id, Some(facility_b))]); assert!(!Distributor::is_facility_eligible(&cluster, &facilities, instance_id)); } @@ -970,44 +944,13 @@ mod tests { let cluster = make_cluster(facility, show, "tag"); let instance_id = Uuid::new_v4(); - let facilities = HashMap::from([(instance_id, InstanceFacility { - name: None, - id: None, - })]); + let facilities: HashMap> = HashMap::from([(instance_id, None)]); assert!(Distributor::is_facility_eligible(&cluster, &facilities, instance_id)); } - #[test] - fn test_facility_eligible_unresolved_name_rejects() { - let facility = Uuid::new_v4(); - let show = Uuid::new_v4(); - let cluster = make_cluster(facility, show, "tag"); - let instance_id = Uuid::new_v4(); - - // Instance has a facility name but it didn't resolve to a UUID - let facilities = HashMap::from([(instance_id, InstanceFacility { - name: Some("nonexistent".to_string()), - id: None, - })]); - - assert!(!Distributor::is_facility_eligible(&cluster, &facilities, instance_id)); - } - // --- compute_assignments facility tests --- - fn make_instance_unresolved_facility(id: Uuid, name: &str, capacity: i32) -> InstanceRow { - InstanceRow { - pk_instance: id, - str_name: format!("test:{}", id), - str_facility_id: None, - str_facility: Some(name.to_string()), - int_capacity: capacity, - float_jobs_queried: 0.0, - b_draining: false, - } - } - #[test] fn test_facility_scoped_instances_no_cross_assignment() { let facility_a = Uuid::new_v4(); @@ -1053,36 +996,6 @@ mod tests { } } - #[test] - fn test_unresolved_facility_instance_gets_no_clusters() { - let facility = Uuid::new_v4(); - let show = Uuid::new_v4(); - - let clusters: Vec = (0..4) - .map(|i| make_cluster(facility, show, &format!("tag{}", i))) - .collect(); - - let inst_good = Uuid::new_v4(); - let inst_bad = Uuid::new_v4(); - let rated_instances: HashMap = [ - (inst_good, (make_instance(inst_good, None, 100), 0.0)), - ( - inst_bad, - (make_instance_unresolved_facility(inst_bad, "bogus", 100), 0.0), - ), - ] - .into(); - - let assignments = - Distributor::compute_assignments(&clusters, &rated_instances, &HashMap::new()); - - // All clusters should go to inst_good; inst_bad has an unresolved facility - assert_eq!(assignments.len(), 4); - for id in assignments.values() { - assert_eq!(*id, inst_good); - } - } - #[test] fn test_mixed_scoped_and_unscoped_instances() { let facility_a = Uuid::new_v4(); diff --git a/rust/crates/scheduler/src/orchestrator/instance.rs b/rust/crates/scheduler/src/orchestrator/instance.rs index 223570806..9c73603f2 100644 --- a/rust/crates/scheduler/src/orchestrator/instance.rs +++ b/rust/crates/scheduler/src/orchestrator/instance.rs @@ -12,12 +12,13 @@ use std::sync::Arc; -use miette::{IntoDiagnostic, Result}; +use miette::{IntoDiagnostic, Result, WrapErr}; use tokio::sync::watch; use tokio::task::JoinHandle; use tracing::{error, info, warn}; use uuid::Uuid; +use crate::cluster::get_facility_id; use crate::config::CONFIG; use crate::metrics::JOBS_QUERIED_TOTAL; @@ -29,7 +30,8 @@ use super::dao::OrchestratorDao; pub struct InstanceManager { pub instance_id: Uuid, instance_name: String, - facility: Option, + /// Resolved facility UUID (as string), or None if unscoped. + facility_id: Option, capacity: i32, dao: Arc, } @@ -39,16 +41,17 @@ impl InstanceManager { /// /// Generates a unique instance ID, builds an instance name from hostname and PID, /// and initializes the orchestrator DAO for database operations. + /// If a facility name is provided, it is resolved to its UUID immediately. /// /// # Arguments /// - /// * `facility` - Optional facility name to scope this instance to a specific facility + /// * `facility_name` - Optional facility name to scope this instance to a specific facility /// /// # Returns /// /// * `Ok(InstanceManager)` - Successfully created instance manager - /// * `Err(miette::Error)` - Failed to establish database connection - pub async fn new(facility: Option) -> Result { + /// * `Err(miette::Error)` - Failed to establish database connection or resolve facility name + pub async fn new(facility_name: Option) -> Result { let instance_id = Uuid::new_v4(); let hostname = gethostname::gethostname().to_string_lossy().to_string(); let pid = std::process::id(); @@ -56,10 +59,20 @@ impl InstanceManager { let capacity = CONFIG.orchestrator.capacity as i32; let dao = Arc::new(OrchestratorDao::new().await?); + let facility_id = match &facility_name { + Some(name) => Some( + get_facility_id(name) + .await + .wrap_err_with(|| format!("facility '{}' not found", name))? + .to_string(), + ), + None => None, + }; + Ok(InstanceManager { instance_id, instance_name, - facility, + facility_id, capacity, dao, }) @@ -67,7 +80,7 @@ impl InstanceManager { /// Creates an instance manager with an externally provided DAO. /// Useful for testing with an embedded database. - pub fn with_dao(dao: Arc, facility: Option, capacity: i32) -> Self { + pub fn with_dao(dao: Arc, facility_id: Option, capacity: i32) -> Self { let instance_id = Uuid::new_v4(); let hostname = gethostname::gethostname().to_string_lossy().to_string(); let pid = std::process::id(); @@ -75,7 +88,7 @@ impl InstanceManager { InstanceManager { instance_id, instance_name, - facility, + facility_id, capacity, dao, } @@ -95,7 +108,7 @@ impl InstanceManager { .register_instance( self.instance_id, &self.instance_name, - self.facility.as_deref(), + self.facility_id.as_deref(), self.capacity, ) .await @@ -103,7 +116,7 @@ impl InstanceManager { info!( instance_id = %self.instance_id, name = %self.instance_name, - facility = ?self.facility, + facility_id = ?self.facility_id, capacity = self.capacity, "Registered scheduler instance" ); diff --git a/rust/crates/scheduler/tests/integration_tests.rs b/rust/crates/scheduler/tests/integration_tests.rs index 8a24aef1c..f3d658fbc 100644 --- a/rust/crates/scheduler/tests/integration_tests.rs +++ b/rust/crates/scheduler/tests/integration_tests.rs @@ -138,7 +138,7 @@ mod orchestration_tests { let (pool, url) = create_test_db("deregister").await; let dao = Arc::new(OrchestratorDao::with_pool(pool.clone(), url)); - let mgr = InstanceManager::with_dao(dao.clone(), Some("test-facility".to_string()), 100); + let mgr = InstanceManager::with_dao(dao.clone(), None, 100); mgr.register().await.expect("register should succeed"); // Verify it exists @@ -162,7 +162,7 @@ mod orchestration_tests { let mgr1 = InstanceManager::with_dao(dao.clone(), None, 100); let mgr2 = InstanceManager::with_dao(dao.clone(), None, 200); - let mgr3 = InstanceManager::with_dao(dao.clone(), Some("facility-a".to_string()), 50); + let mgr3 = InstanceManager::with_dao(dao.clone(), None, 50); mgr1.register().await.unwrap(); mgr2.register().await.unwrap(); @@ -183,6 +183,112 @@ mod orchestration_tests { assert!(!instances.contains_key(&mgr2.instance_id)); } + // ── Test: Facility-scoped instance registration ─────────────────────── + + #[tokio::test] + async fn test_facility_scoped_instance_registration() { + let (pool, url) = create_test_db("facility_register").await; + let dao = Arc::new(OrchestratorDao::with_pool(pool.clone(), url)); + + // Well-known facility UUIDs from seed data + let local_facility_id = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa1"; + let cloud_facility_id = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa0"; + + let mgr_local = InstanceManager::with_dao(dao.clone(), Some(local_facility_id.to_string()), 100); + let mgr_cloud = InstanceManager::with_dao(dao.clone(), Some(cloud_facility_id.to_string()), 100); + let mgr_unscoped = InstanceManager::with_dao(dao.clone(), None, 100); + + mgr_local.register().await.unwrap(); + mgr_cloud.register().await.unwrap(); + mgr_unscoped.register().await.unwrap(); + + let instances = dao.get_live_instances(Duration::from_secs(30)).await.unwrap(); + assert_eq!(instances.len(), 3); + + // Verify facility IDs are stored correctly + assert_eq!( + instances[&mgr_local.instance_id].pk_facility.as_deref(), + Some(local_facility_id) + ); + assert_eq!( + instances[&mgr_cloud.instance_id].pk_facility.as_deref(), + Some(cloud_facility_id) + ); + assert_eq!( + instances[&mgr_unscoped.instance_id].pk_facility, + None + ); + } + + // ── Test: Facility-scoped cluster assignment ──────────────────────── + + #[tokio::test] + async fn test_facility_scoped_cluster_assignment() { + let (pool, url) = create_test_db("facility_assign").await; + let dao = Arc::new(OrchestratorDao::with_pool(pool.clone(), url)); + + // Well-known facility UUIDs from seed data + let local_facility_id = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa1"; + let cloud_facility_id = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa0"; + let local_uuid = Uuid::parse_str(local_facility_id).unwrap(); + let cloud_uuid = Uuid::parse_str(cloud_facility_id).unwrap(); + + // Register facility-scoped instances + let mgr_local = InstanceManager::with_dao(dao.clone(), Some(local_facility_id.to_string()), 100); + let mgr_cloud = InstanceManager::with_dao(dao.clone(), Some(cloud_facility_id.to_string()), 100); + mgr_local.register().await.unwrap(); + mgr_cloud.register().await.unwrap(); + + // Create clusters scoped to each facility + let show = Uuid::new_v4(); + let local_clusters: Vec = (0..3) + .map(|i| Cluster::single_tag( + local_uuid, + show, + Tag { name: format!("local_tag{}", i), ttype: TagType::Alloc }, + )) + .collect(); + let cloud_clusters: Vec = (0..3) + .map(|i| Cluster::single_tag( + cloud_uuid, + show, + Tag { name: format!("cloud_tag{}", i), ttype: TagType::Alloc }, + )) + .collect(); + let all_clusters: Vec = local_clusters.iter().chain(&cloud_clusters).cloned().collect(); + + // Compute assignments + let instances = dao.get_live_instances(Duration::from_secs(30)).await.unwrap(); + let rated = rated_instances_from_dao(instances); + let assignments = Distributor::compute_assignments(&all_clusters, &rated, &HashMap::new()); + + assert_eq!(assignments.len(), 6); + + // Local clusters must go to local instance, cloud clusters to cloud instance + for c in &local_clusters { + assert_eq!( + assignments[&c.id], mgr_local.instance_id, + "local cluster {} should be assigned to local instance", + c.id + ); + } + for c in &cloud_clusters { + assert_eq!( + assignments[&c.id], mgr_cloud.instance_id, + "cloud cluster {} should be assigned to cloud instance", + c.id + ); + } + + // Write assignments to DB and verify round-trip + for cluster in &all_clusters { + let inst = assignments[&cluster.id]; + dao.upsert_assignment(inst, cluster).await.unwrap(); + } + let stored = dao.get_all_assignments().await.unwrap(); + assert_eq!(stored.len(), 6); + } + // ── Test 5: Leader election loop (live async) ──────────────────────── #[tokio::test] From f29a7521639207920eaedd56a0de7b387e53544b Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Fri, 10 Apr 2026 15:20:46 -0700 Subject: [PATCH 13/16] Refactor cluster_dao --- rust/crates/scheduler/src/cluster.rs | 34 +------------------ rust/crates/scheduler/src/dao/cluster_dao.rs | 4 +-- rust/crates/scheduler/src/main.rs | 21 +++++++++--- .../scheduler/src/orchestrator/instance.rs | 18 ++++++---- 4 files changed, 30 insertions(+), 47 deletions(-) diff --git a/rust/crates/scheduler/src/cluster.rs b/rust/crates/scheduler/src/cluster.rs index bf30a4159..57a9b40fb 100644 --- a/rust/crates/scheduler/src/cluster.rs +++ b/rust/crates/scheduler/src/cluster.rs @@ -21,7 +21,7 @@ use std::{ use futures::StreamExt; use itertools::Itertools; -use miette::{IntoDiagnostic, Result}; +use miette::Result; use serde::{Deserialize, Serialize}; use tokio::sync::mpsc; use tracing::{debug, error, warn}; @@ -619,35 +619,3 @@ impl ClusterFeed { } } -/// Looks up a facility ID by facility name. -/// -/// # Arguments -/// -/// * `facility_name` - The name of the facility -/// -/// # Returns -/// -/// * `Ok(Uuid)` - The facility ID -/// * `Err(miette::Error)` - If facility not found or database error -pub async fn get_facility_id(facility_name: &str) -> Result { - let cluster_dao = ClusterDao::new().await?; - cluster_dao - .get_facility_id(facility_name) - .await - .into_diagnostic() -} - -/// Looks up a show ID by show name. -/// -/// # Arguments -/// -/// * `show_name` - The name of the show -/// -/// # Returns -/// -/// * `Ok(Uuid)` - The show ID -/// * `Err(miette::Error)` - If show not found or database error -pub async fn get_show_id(show_name: &str) -> Result { - let cluster_dao = ClusterDao::new().await?; - cluster_dao.get_show_id(show_name).await.into_diagnostic() -} diff --git a/rust/crates/scheduler/src/dao/cluster_dao.rs b/rust/crates/scheduler/src/dao/cluster_dao.rs index 716b175ad..fec81d4ad 100644 --- a/rust/crates/scheduler/src/dao/cluster_dao.rs +++ b/rust/crates/scheduler/src/dao/cluster_dao.rs @@ -296,12 +296,12 @@ impl ClusterDao { /// /// * `Ok(Uuid)` - The facility ID /// * `Err(sqlx::Error)` - If facility not found or database error - pub async fn get_facility_id(&self, facility_name: &str) -> Result { + pub async fn get_facility_id(&self, facility_name: &str) -> Result { let row: (String,) = sqlx::query_as(QUERY_FACILITY_ID) .bind(facility_name) .fetch_one(&*self.connection_pool) .await?; - Ok(parse_uuid(&row.0)) + Ok(row.0) } /// Looks up a show ID by show name. diff --git a/rust/crates/scheduler/src/main.rs b/rust/crates/scheduler/src/main.rs index b60c20136..a5ee7184c 100644 --- a/rust/crates/scheduler/src/main.rs +++ b/rust/crates/scheduler/src/main.rs @@ -25,6 +25,7 @@ use crate::{ cluster::{Cluster, ClusterFeed}, cluster_key::{Tag, TagType}, config::CONFIG, + dao::{helpers::parse_uuid, ClusterDao}, }; mod cluster; @@ -184,12 +185,18 @@ impl JobQueueCli { return orchestrator::run(facility, ignore_tags).await; } + let cluster_dao = ClusterDao::new().await?; + // Lookup facility_id from facility name let facility_id = match &facility { Some(facility) => Some( - cluster::get_facility_id(facility) - .await - .wrap_err("Invalid facility name")?, + parse_uuid( + &cluster_dao + .get_facility_id(facility) + .await + .into_diagnostic() + .wrap_err("Invalid facility name")?, + ), ), None => None, }; @@ -199,8 +206,10 @@ impl JobQueueCli { if let Some(facility_id) = &facility_id { // Build Cluster::ComposedKey for each alloc_tag (show:tag format) for alloc_tag in &alloc_tags { - let show_id = cluster::get_show_id(&alloc_tag.show) + let show_id = cluster_dao + .get_show_id(&alloc_tag.show) .await + .into_diagnostic() .wrap_err(format!("Could not find show {}.", alloc_tag.show))?; clusters.push(Cluster::single_tag( *facility_id, @@ -214,8 +223,10 @@ impl JobQueueCli { // Build Cluster::TagsKey for manual_tags for manual_tag in &manual_tags { - let show_id = cluster::get_show_id(&manual_tag.show) + let show_id = cluster_dao + .get_show_id(&manual_tag.show) .await + .into_diagnostic() .wrap_err(format!("Could not find show {}.", manual_tag.show))?; clusters.push(Cluster::from_tags( *facility_id, diff --git a/rust/crates/scheduler/src/orchestrator/instance.rs b/rust/crates/scheduler/src/orchestrator/instance.rs index 9c73603f2..37a74448a 100644 --- a/rust/crates/scheduler/src/orchestrator/instance.rs +++ b/rust/crates/scheduler/src/orchestrator/instance.rs @@ -18,8 +18,8 @@ use tokio::task::JoinHandle; use tracing::{error, info, warn}; use uuid::Uuid; -use crate::cluster::get_facility_id; use crate::config::CONFIG; +use crate::dao::ClusterDao; use crate::metrics::JOBS_QUERIED_TOTAL; use super::dao::OrchestratorDao; @@ -60,12 +60,16 @@ impl InstanceManager { let dao = Arc::new(OrchestratorDao::new().await?); let facility_id = match &facility_name { - Some(name) => Some( - get_facility_id(name) - .await - .wrap_err_with(|| format!("facility '{}' not found", name))? - .to_string(), - ), + Some(name) => { + let cluster_dao = ClusterDao::new().await?; + Some( + cluster_dao + .get_facility_id(name) + .await + .into_diagnostic() + .wrap_err_with(|| format!("facility '{}' not found", name))?, + ) + } None => None, }; From 16df10a51dc2e2ad0199c1018fa1b5efd7c0d8aa Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Fri, 10 Apr 2026 15:39:40 -0700 Subject: [PATCH 14/16] Only load clusters scoped to facilities serviced by at least one instance --- .../scheduler/src/orchestrator/distributor.rs | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/rust/crates/scheduler/src/orchestrator/distributor.rs b/rust/crates/scheduler/src/orchestrator/distributor.rs index 4ad4aaba9..5d46ab20f 100644 --- a/rust/crates/scheduler/src/orchestrator/distributor.rs +++ b/rust/crates/scheduler/src/orchestrator/distributor.rs @@ -10,7 +10,7 @@ // or implied. See the License for the specific language governing permissions and limitations under // the License. -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::time::{Duration, Instant}; use rand::Rng; @@ -164,6 +164,30 @@ impl Distributor { return Ok(()); } + // Filter clusters to only those whose facility has at least one live instance. + // This avoids warning spam when running facility-scoped instances that can't + // serve clusters from other facilities. + let has_unscoped_instance = instances.values().any(|inst| inst.pk_facility.is_none()); + let all_clusters = if has_unscoped_instance { + // An unscoped instance accepts all clusters, so no filtering needed + all_clusters + } else { + let covered_facilities: HashSet = instances + .values() + .filter_map(|inst| inst.pk_facility.as_deref().map(parse_uuid)) + .collect(); + let (eligible, skipped): (Vec<_>, Vec<_>) = all_clusters + .into_iter() + .partition(|c| covered_facilities.contains(&c.facility_id)); + if !skipped.is_empty() { + debug!( + "Skipped {} cluster(s) from facilities with no live instance", + skipped.len() + ); + } + eligible + }; + // Read current assignments, expired included (cluster_id -> instance_id) let current_assignments = dao.get_all_assignments().await.into_diagnostic()?; From ab4d1b3f2f6b39512d502e4c66ebe979a25c088b Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Mon, 20 Apr 2026 10:16:59 -0700 Subject: [PATCH 15/16] Apply fixes suggested by Code Rabbit --- rust/crates/scheduler/src/cluster.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rust/crates/scheduler/src/cluster.rs b/rust/crates/scheduler/src/cluster.rs index 57a9b40fb..569f45cc7 100644 --- a/rust/crates/scheduler/src/cluster.rs +++ b/rust/crates/scheduler/src/cluster.rs @@ -88,6 +88,8 @@ impl Cluster { /// Creates a cluster from explicitly provided tags (e.g. CLI arguments). /// The ID is derived from the sorted tag names. + /// **Attention:** If `tags` contains tags of different `ttype`s, the generated ID will only + /// contain the type of the first tag, which is unexpected behavior. pub fn from_tags(facility_id: Uuid, show_id: Uuid, tags: Vec) -> Self { let tag_type = tags.first().map_or("unknown", |t| t.ttype.as_str()); let sorted_tags: BTreeSet = tags.into_iter().collect(); @@ -246,8 +248,8 @@ impl ClusterFeed { { let mut clusters = self.clusters.write().unwrap_or_else(|p| p.into_inner()); *clusters = new_clusters; + self.current_index.store(0, Ordering::Relaxed); } - self.current_index.store(0, Ordering::Relaxed); } /// Returns a builder for a feed scoped to the given facility. @@ -312,7 +314,6 @@ impl ClusterFeed { ignore_tags: &[String], shows_filter: Option>, ) -> Result> { - // Fetch clusters for alloc and non_alloc tags let mut clusters_stream = cluster_dao .fetch_alloc_clusters(facility_id, shows_filter.clone()) @@ -618,4 +619,3 @@ impl ClusterFeed { cancel_sender } } - From fadeaacf319fb6b92a3dfb87623382213bad1031 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Mon, 20 Apr 2026 10:18:38 -0700 Subject: [PATCH 16/16] Rename migrations to move ours to the last position --- ...finished_jobs_index.sql => V39__Add_unfinished_jobs_index.sql} | 0 ...ator_tables.sql => V40__Add_scheduler_orchestrator_tables.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename cuebot/src/main/resources/conf/ddl/postgres/migrations/{V40__Add_unfinished_jobs_index.sql => V39__Add_unfinished_jobs_index.sql} (100%) rename cuebot/src/main/resources/conf/ddl/postgres/migrations/{V39__Add_scheduler_orchestrator_tables.sql => V40__Add_scheduler_orchestrator_tables.sql} (100%) diff --git a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V40__Add_unfinished_jobs_index.sql b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V39__Add_unfinished_jobs_index.sql similarity index 100% rename from cuebot/src/main/resources/conf/ddl/postgres/migrations/V40__Add_unfinished_jobs_index.sql rename to cuebot/src/main/resources/conf/ddl/postgres/migrations/V39__Add_unfinished_jobs_index.sql diff --git a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V39__Add_scheduler_orchestrator_tables.sql b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V40__Add_scheduler_orchestrator_tables.sql similarity index 100% rename from cuebot/src/main/resources/conf/ddl/postgres/migrations/V39__Add_scheduler_orchestrator_tables.sql rename to cuebot/src/main/resources/conf/ddl/postgres/migrations/V40__Add_scheduler_orchestrator_tables.sql