fix: azure openai not working with new agentic workflow (#2189)

haitrr · oxyhelper[bot] · commit 8fef217b5e19 · 2026-04-28T03:05:16.000Z
* fix: azure openai not working with new agentic workflow

* fix: resuming

* fix: warning

GitOrigin-RevId: 9124657a9da1cbbc4800c3f6e3493fd38036f090
diff --git a/crates/agentic/analytics/src/config/mod.rs b/crates/agentic/analytics/src/config/mod.rs
@@ -226,6 +226,12 @@ pub struct ResolvedModelInfo {
     /// Used to decide vendor precedence: when a ref is set the ref's vendor
     /// is preferred even if `llm.model` is also explicitly overridden.
     pub is_explicit_ref: bool,
+    /// Azure deployment ID (e.g. `"my-gpt4o-deployment"`). Present only for
+    /// Azure OpenAI models configured with `azure_deployment_id` in config.yml.
+    pub azure_deployment_id: Option<String>,
+    /// Azure API version (e.g. `"2025-03-01-preview"`). Present only for
+    /// Azure OpenAI models configured with `azure_api_version` in config.yml.
+    pub azure_api_version: Option<String>,
 }
 
 // ── BuildContext ──────────────────────────────────────────────────────────────
@@ -286,12 +292,40 @@ fn build_engine(cfg: &SemanticEngineConfig) -> Result<Box<dyn SemanticEngine>, C
 ///
 /// Extracted so it can be called both for the global client and for per-state
 /// model overrides (which inherit vendor, key, and base_url).
+///
+/// When `azure_deployment_id` and `azure_api_version` are both `Some`, the
+/// model is Azure OpenAI: `OpenAiCompatProvider` is used with the full Azure
+/// Chat Completions URL regardless of `vendor`.
 fn build_llm_client(
     vendor: &LlmVendor,
     api_key: &str,
     model: &str,
     base_url: Option<&str>,
+    azure_deployment_id: Option<&str>,
+    azure_api_version: Option<&str>,
 ) -> LlmClient {
+    if let (Some(deployment_id), Some(api_version), Some(base)) =
+        (azure_deployment_id, azure_api_version, base_url)
+    {
+        return LlmClient::with_provider(OpenAiCompatProvider::for_azure(
+            api_key,
+            model,
+            base,
+            deployment_id,
+            api_version,
+        ));
+    }
+    if azure_deployment_id.is_some() && azure_api_version.is_some() && base_url.is_none() {
+        tracing::warn!(
+            "Azure config has deployment_id and api_version set but no base_url; \
+             falling back to standard OpenAI."
+        );
+    } else if azure_deployment_id.is_some() != azure_api_version.is_some() {
+        tracing::warn!(
+            "Azure config is incomplete: both azure_deployment_id and azure_api_version must \
+             be set together. Falling back to standard OpenAI."
+        );
+    }
     match vendor {
         LlmVendor::Anthropic => LlmClient::with_model(api_key, model),
         LlmVendor::OpenAi => {
@@ -489,10 +523,21 @@ impl AgentConfig {
             .as_deref()
             .or(pmi.as_ref().and_then(|m| m.base_url.as_deref()));
 
-        let client = build_llm_client(effective_vendor, &api_key, &model, effective_base_url);
+        // Azure fields from the project model config (not overridable per-state).
+        let azure_deployment_id = pmi.as_ref().and_then(|m| m.azure_deployment_id.as_deref());
+        let azure_api_version = pmi.as_ref().and_then(|m| m.azure_api_version.as_deref());
+
+        let client = build_llm_client(
+            effective_vendor,
+            &api_key,
+            &model,
+            effective_base_url,
+            azure_deployment_id,
+            azure_api_version,
+        );
 
         // Build per-state clients for states that declare a `model:` override.
-        // Inherits vendor / api_key / base_url from the global config.
+        // Inherits vendor / api_key / base_url / azure config from the global config.
         let state_clients: std::collections::HashMap<String, LlmClient> = self
             .states
             .iter()
@@ -503,6 +548,8 @@ impl AgentConfig {
                         &api_key,
                         state_model,
                         effective_base_url,
+                        azure_deployment_id,
+                        azure_api_version,
                     );
                     (state_name.clone(), c)
                 })
@@ -559,6 +606,8 @@ impl AgentConfig {
                 &api_key,
                 override_model,
                 effective_base_url,
+                azure_deployment_id,
+                azure_api_version,
             );
             solver = solver.with_client_override(override_client);
         }
diff --git a/crates/agentic/llm/src/openai_compat.rs b/crates/agentic/llm/src/openai_compat.rs
@@ -155,7 +155,7 @@ fn inject_cot(system: &str) -> String {
 // ── OpenAiCompatProvider ──────────────────────────────────────────────────────
 
 /// OpenAI Chat Completions API provider for OpenAI-compatible backends
-/// (Ollama, vLLM, LM Studio, etc.).
+/// (Ollama, vLLM, LM Studio, Azure OpenAI, etc.).
 ///
 /// Uses the `/v1/chat/completions` endpoint which is the de-facto standard for
 /// locally-hosted LLMs.  Supports:
@@ -170,9 +170,8 @@ fn inject_cot(system: &str) -> String {
 pub struct OpenAiCompatProvider {
     api_key: String,
     model: String,
-    /// Base URL of the Chat Completions endpoint, e.g.
-    /// `http://localhost:11434/v1` (Ollama) or `http://host:8000/v1` (vLLM).
-    base_url: String,
+    /// Full Chat Completions URL used for every request.
+    completions_url: String,
     client: reqwest::Client,
 }
 
@@ -188,20 +187,58 @@ impl OpenAiCompatProvider {
         base_url: impl Into<String>,
     ) -> Self {
         let mut base = base_url.into();
-        // Normalise: strip trailing slash.
         while base.ends_with('/') {
             base.pop();
         }
         Self {
             api_key: api_key.into(),
             model: model.into(),
-            base_url: base,
+            completions_url: format!("{base}/chat/completions"),
             client: reqwest::Client::new(),
         }
     }
 
-    fn completions_url(&self) -> String {
-        format!("{}/chat/completions", self.base_url)
+    /// Create a provider with an explicit full completions URL.
+    ///
+    /// Use this when the target endpoint cannot be expressed as `{base}/chat/completions`,
+    /// for example Azure OpenAI which requires a deployment path and `api-version` query
+    /// parameter: `https://{resource}.openai.azure.com/openai/deployments/{deployment}/chat/completions?api-version={ver}`.
+    pub fn with_completions_url(
+        api_key: impl Into<String>,
+        model: impl Into<String>,
+        completions_url: impl Into<String>,
+    ) -> Self {
+        Self {
+            api_key: api_key.into(),
+            model: model.into(),
+            completions_url: completions_url.into(),
+            client: reqwest::Client::new(),
+        }
+    }
+
+    /// Create a provider targeting an Azure OpenAI deployment.
+    ///
+    /// Constructs the full Chat Completions URL from the resource endpoint,
+    /// deployment name, and API version.  Trailing slashes on `base_url` are
+    /// normalised automatically.
+    pub fn for_azure(
+        api_key: impl Into<String>,
+        model: impl Into<String>,
+        base_url: impl Into<String>,
+        deployment_id: impl AsRef<str>,
+        api_version: impl AsRef<str>,
+    ) -> Self {
+        let mut base = base_url.into();
+        while base.ends_with('/') {
+            base.pop();
+        }
+        let url = format!(
+            "{}/openai/deployments/{}/chat/completions?api-version={}",
+            base,
+            deployment_id.as_ref(),
+            api_version.as_ref()
+        );
+        Self::with_completions_url(api_key, model, url)
     }
 }
 
@@ -301,10 +338,10 @@ impl LlmProvider for OpenAiCompatProvider {
             }
         }
 
-        let url = self.completions_url();
+        let url = &self.completions_url;
         let mut req = self
             .client
-            .post(&url)
+            .post(url.as_str())
             .header("content-type", "application/json");
 
         if !self.api_key.is_empty() {
@@ -548,3 +585,47 @@ impl LlmProvider for OpenAiCompatProvider {
         &self.model
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn for_azure_builds_correct_url() {
+        let p = OpenAiCompatProvider::for_azure(
+            "key",
+            "gpt-4",
+            "https://myresource.openai.azure.com",
+            "my-deployment",
+            "2024-05-01-preview",
+        );
+        assert_eq!(
+            p.completions_url,
+            "https://myresource.openai.azure.com/openai/deployments/my-deployment/chat/completions?api-version=2024-05-01-preview"
+        );
+    }
+
+    #[test]
+    fn for_azure_strips_trailing_slashes() {
+        let p = OpenAiCompatProvider::for_azure(
+            "key",
+            "gpt-4",
+            "https://myresource.openai.azure.com///",
+            "dep",
+            "2024-02-01",
+        );
+        assert_eq!(
+            p.completions_url,
+            "https://myresource.openai.azure.com/openai/deployments/dep/chat/completions?api-version=2024-02-01"
+        );
+    }
+
+    #[test]
+    fn new_strips_trailing_slashes() {
+        let p = OpenAiCompatProvider::new("key", "model", "http://localhost:11434/v1//");
+        assert_eq!(
+            p.completions_url,
+            "http://localhost:11434/v1/chat/completions"
+        );
+    }
+}
diff --git a/crates/agentic/pipeline/src/lib.rs b/crates/agentic/pipeline/src/lib.rs
@@ -586,7 +586,10 @@ impl PipelineBuilder {
         // created the run via insert_run_with_parent.
         let source_type = "builder";
         if !skip_db_insert {
-            let metadata = serde_json::json!({ "agent_id": "__builder__" });
+            let metadata = serde_json::json!({
+                "agent_id": "__builder__",
+                "model": model,
+            });
             agentic_runtime::crud::insert_run(
                 db,
                 run_id,
@@ -656,18 +659,27 @@ impl PipelineBuilder {
 
 /// Resolve the builder domain's LLM client via the platform port.
 ///
-/// Preserves the legacy fallback: if no model config matches, default to
-/// `claude-sonnet-4-6` with the key read from `ANTHROPIC_API_KEY`.
+/// Tries the explicit model ref first, then the project's configured default.
+/// Never falls back to a hardcoded provider.
 async fn build_builder_llm_client(ctx: &dyn ProjectContext, model: Option<String>) -> LlmClient {
-    let model_name = model.unwrap_or_else(|| "claude-sonnet-4-6".to_string());
-    if let Some(info) = ctx.resolve_model(Some(&model_name), false).await {
+    // Try explicit model ref, then project default.
+    let info = if let Some(ref name) = model {
+        match ctx.resolve_model(Some(name), false).await {
+            Some(info) => Some(info),
+            None => ctx.resolve_model(None, false).await,
+        }
+    } else {
+        ctx.resolve_model(None, false).await
+    };
+    if let Some(info) = info {
         return platform::build_llm_client(&info);
     }
-    let api_key = ctx
-        .resolve_secret("ANTHROPIC_API_KEY")
-        .await
-        .unwrap_or_default();
-    LlmClient::with_model(api_key, model_name)
+    tracing::warn!(
+        model = ?model,
+        "builder: no LLM model resolved from project config; LLM calls will fail"
+    );
+    // Return a placeholder — the LLM call will fail with a clear error.
+    LlmClient::with_model("", model.unwrap_or_default())
 }
 
 // ── StartedPipeline (type-erased) ───────────────────────────────────────────
diff --git a/crates/agentic/pipeline/src/platform/mod.rs b/crates/agentic/pipeline/src/platform/mod.rs
@@ -116,8 +116,39 @@ pub async fn resolve_connectors(
 }
 
 /// Build an [`LlmClient`] from a [`ResolvedModelInfo`], dispatching on vendor.
+///
+/// Azure OpenAI models are detected via `azure_deployment_id` / `azure_api_version`
+/// and routed to [`OpenAiCompatProvider`] (Chat Completions) with the correct
+/// deployment URL, bypassing the Responses API used by [`OpenAiProvider`].
 pub fn build_llm_client(info: &ResolvedModelInfo) -> LlmClient {
     let api_key = info.api_key.as_deref().unwrap_or("");
+    if let (Some(deployment_id), Some(api_version), Some(base_url)) = (
+        info.azure_deployment_id.as_deref(),
+        info.azure_api_version.as_deref(),
+        info.base_url.as_deref(),
+    ) {
+        return LlmClient::with_provider(OpenAiCompatProvider::for_azure(
+            api_key,
+            &info.model,
+            base_url,
+            deployment_id,
+            api_version,
+        ));
+    }
+    if info.azure_deployment_id.is_some()
+        && info.azure_api_version.is_some()
+        && info.base_url.is_none()
+    {
+        tracing::warn!(
+            "Azure config has deployment_id and api_version set but no base_url; \
+             falling back to standard OpenAI."
+        );
+    } else if info.azure_deployment_id.is_some() != info.azure_api_version.is_some() {
+        tracing::warn!(
+            "Azure config is incomplete: both azure_deployment_id and azure_api_version must \
+             be set together. Falling back to standard OpenAI."
+        );
+    }
     match &info.vendor {
         LlmVendor::Anthropic => LlmClient::with_model(api_key, &info.model),
         LlmVendor::OpenAi => {
diff --git a/crates/app/src/agentic_wiring/project_ctx.rs b/crates/app/src/agentic_wiring/project_ctx.rs
@@ -605,22 +605,39 @@ async fn resolve_model_impl(
             let model_name = model.model_name().to_string();
             let key_var = model.key_var().map(|s| s.to_string());
 
-            let (vendor, base_url, extra_api_key) = match model {
-                Model::Anthropic { config: m } => (LlmVendor::Anthropic, m.api_url.clone(), None),
-                Model::OpenAI { config: m } => (LlmVendor::OpenAi, m.api_url.clone(), None),
-                Model::Ollama { config: m } => (
-                    LlmVendor::OpenAiCompat,
-                    Some(m.api_url.clone()),
-                    Some(m.api_key.clone()),
-                ),
-                Model::Google { .. } => {
-                    tracing::warn!(
-                        model = name,
-                        "Google/Gemini models are not yet supported in analytics agents"
-                    );
-                    return None;
-                }
-            };
+            let (vendor, base_url, extra_api_key, azure_deployment_id, azure_api_version) =
+                match model {
+                    Model::Anthropic { config: m } => {
+                        (LlmVendor::Anthropic, m.api_url.clone(), None, None, None)
+                    }
+                    Model::OpenAI { config: m } => {
+                        let (dep_id, api_ver) = m
+                            .azure
+                            .as_ref()
+                            .map(|a| {
+                                (
+                                    Some(a.azure_deployment_id.clone()),
+                                    Some(a.azure_api_version.clone()),
+                                )
+                            })
+                            .unwrap_or((None, None));
+                        (LlmVendor::OpenAi, m.api_url.clone(), None, dep_id, api_ver)
+                    }
+                    Model::Ollama { config: m } => (
+                        LlmVendor::OpenAiCompat,
+                        Some(m.api_url.clone()),
+                        Some(m.api_key.clone()),
+                        None,
+                        None,
+                    ),
+                    Model::Google { .. } => {
+                        tracing::warn!(
+                            model = name,
+                            "Google/Gemini models are not yet supported in analytics agents"
+                        );
+                        return None;
+                    }
+                };
 
             // Resolve api_key via secrets_manager first, env fallback. Ollama
             // carries its key inline via the config — honor that.
@@ -644,6 +661,8 @@ async fn resolve_model_impl(
                 api_key,
                 base_url,
                 is_explicit_ref,
+                azure_deployment_id,
+                azure_api_version,
             })
         }
         Err(e) => {
diff --git a/crates/app/src/server/api/onboarding.rs b/crates/app/src/server/api/onboarding.rs
diff --git a/crates/infrastructure/llm/oxy-llm/src/validation.rs b/crates/infrastructure/llm/oxy-llm/src/validation.rs