feat(chat): reorder harness markers and split compaction buckets (#222)

devin-ai-integration[bot] · devin-ai-integration[bot] · cognition-team · web-flow · commit 7f8f845563bc · 2026-04-19T14:08:39.000-07:00
## Summary Reshapes the user-turn prompt wrapper and thread-background rendering so Claude Sonnet and GPT-5 treat the current instruction as authoritative and prior thread context as read-only reference material. Addresses the failure mode tracked in #221 and `getsentry/junior-prod#35`, where Junior drifts onto a narrowed-but-superseded ask from earlier in a thread. Changes in `packages/junior/src/chat/respond-helpers.ts` (`buildUserTurnText`): - Order (top → bottom): `<thread-background>`, `<session-context>`, `<turn-context>`, `<current-instruction priority="highest">` — `<current-instruction>` is always the final block, matching Anthropic's long-context guidance to place the active query last. - Drops legacy `<current-message>` / `<thread-conversation-context>` wrappers. - No explanatory prose inside markers — tag names carry the signal. Changes in `packages/junior/src/chat/services/conversation-memory.ts`: - `buildConversationContext` wraps each compaction in `<compaction index=… covered_messages=… created_at=…>` and each transcript entry in `<message index=… ts=… role=… author=… slack_ts=…>`, so each prior item is an individually addressable reference instead of a flat blob. - `summarizeConversationChunk` prompt now produces three fixed sections — `<active-asks>`, `<superseded-or-completed-asks>`, `<facts>` — so stale or already-acted-on asks stop reading as live constraints after compaction. Rationale and authoritative prior art (Anthropic long-context guide, OpenAI GPT-5 prompting guide, OpenAI Model Spec chain-of-command) are cited in #221. ## Review & Testing Checklist for Human - [ ] Sanity-check the new `buildUserTurnText` output shape against a real thread turn (e.g. local dev or an eval snapshot) and confirm the final tag emitted is `</current-instruction>` and `<thread-background>` precedes it. - [ ] Spot-check one compacted conversation in a real thread to confirm the summarizer is producing the three-bucket XML (active / superseded / facts) rather than a free-form paragraph. Because the summarizer is model-generated, the prompt change only shapes output — run against the production fast model to verify it complies. - [ ] Decide whether this should be gated behind an eval sweep on both Sonnet and GPT-5 gateway models before relying on the new marker shape for production traffic. This PR does not add such an eval. ### Notes - Intentionally preserved the `<thread-transcript>` / `<thread-compactions>` marker names; routing fixtures in `tests/unit/routing/subscribed-decision.test.ts` still reference them. - No runtime behavior change beyond the emitted prompt text; no new dependencies, no schema changes. Compaction storage format (`summary: string`) is unchanged — only the prompt that generates it is updated. - Pre-existing unit-test failure `tests/unit/services/turn-checkpoint.test.ts > reuses the latest stored transcript…` reproduces on `main` (requires `REDIS_URL`) and is unrelated to this PR. - Follow-up candidates (not in this PR): add an eval that exercises narrow-then-broaden instruction drift across a compacted thread; consider also marking the assistant's own prior tool calls with an `executed` flag in `<message>` wrappers. Link to Devin session: https://app.devin.ai/sessions/f46faf27a4354f7dab95abd8dfc50211 Requested by: @dcramer --------- Co-authored-by: devin-ai-integration[bot] <158243448+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: Devin <devin@cognition.ai> Co-authored-by: Devin <devin-ai-integration[bot]@users.noreply.github.com>
diff --git a/packages/junior/src/chat/respond-helpers.ts b/packages/junior/src/chat/respond-helpers.ts
@@ -143,7 +143,11 @@ export function summarizeMessageText(text: string): string {
     : normalized;
 }
 
-/** Wrap user input with conversation context and observability metadata XML tags. */
+/**
+ * Wrap the current user turn with self-describing marker blocks: background
+ * first, current instruction last. Ordering matches long-context attention
+ * guidance for Sonnet and GPT-5.
+ */
 export function buildUserTurnText(
   userInput: string,
   conversationContext?: string,
@@ -153,47 +157,48 @@ export function buildUserTurnText(
   },
 ): string {
   const trimmedContext = conversationContext?.trim();
-  const hasSessionContext = Boolean(metadata?.sessionContext?.conversationId);
-  const hasTurnContext = Boolean(metadata?.turnContext?.traceId);
+  const conversationId = metadata?.sessionContext?.conversationId;
+  const traceId = metadata?.turnContext?.traceId;
 
-  if (!trimmedContext && !hasSessionContext && !hasTurnContext) {
+  if (!trimmedContext && !conversationId && !traceId) {
     return userInput;
   }
 
-  const sections: string[] = [
-    "<current-message>",
-    userInput,
-    "</current-message>",
-  ];
+  const sections: string[] = [];
 
   if (trimmedContext) {
     sections.push(
-      "",
-      "<thread-conversation-context>",
-      "Use this context for continuity across prior thread turns.",
+      "<thread-background>",
       trimmedContext,
-      "</thread-conversation-context>",
+      "</thread-background>",
+      "",
     );
   }
 
-  if (metadata?.sessionContext?.conversationId) {
+  if (conversationId) {
     sections.push(
-      "",
       "<session-context>",
-      `- gen_ai.conversation.id: ${metadata.sessionContext.conversationId}`,
+      `- gen_ai.conversation.id: ${conversationId}`,
       "</session-context>",
+      "",
     );
   }
 
-  if (metadata?.turnContext?.traceId) {
+  if (traceId) {
     sections.push(
-      "",
       "<turn-context>",
-      `- trace_id: ${metadata.turnContext.traceId}`,
+      `- trace_id: ${traceId}`,
       "</turn-context>",
+      "",
     );
   }
 
+  sections.push(
+    '<current-instruction priority="highest">',
+    userInput,
+    "</current-instruction>",
+  );
+
   return sections.join("\n");
 }
 
diff --git a/packages/junior/src/chat/services/conversation-memory.ts b/packages/junior/src/chat/services/conversation-memory.ts
@@ -9,6 +9,7 @@ import type {
 } from "@/chat/state/conversation";
 import { toOptionalString } from "@/chat/coerce";
 import { logWarn, setSpanAttributes } from "@/chat/logging";
+import { escapeXml } from "@/chat/xml";
 
 const CONTEXT_COMPACTION_TRIGGER_TOKENS = 9000;
 const CONTEXT_COMPACTION_TARGET_TOKENS = 7000;
@@ -152,6 +153,11 @@ export function markConversationMessage(
   updateConversationStats(conversation);
 }
 
+/**
+ * Render thread history as structured XML. Each compaction and message is
+ * wrapped with index/ts metadata so the model can reference prior items
+ * individually instead of treating the whole block as one flat narrative.
+ */
 export function buildConversationContext(
   conversation: ThreadConversationState,
   options: {
@@ -166,25 +172,31 @@ export function buildConversationContext(
   }
 
   const lines: string[] = [];
+
   if (conversation.compactions.length > 0) {
     lines.push("<thread-compactions>");
     for (const [index, compaction] of conversation.compactions.entries()) {
       lines.push(
-        [
-          `summary_${index + 1}:`,
-          compaction.summary,
-          `covered_messages: ${compaction.coveredMessageIds.length}`,
-          `created_at: ${new Date(compaction.createdAtMs).toISOString()}`,
-        ].join(" "),
+        `  <compaction index="${index + 1}" covered_messages="${compaction.coveredMessageIds.length}" created_at="${new Date(compaction.createdAtMs).toISOString()}">`,
+        compaction.summary,
+        "  </compaction>",
       );
     }
-    lines.push("</thread-compactions>");
-    lines.push("");
+    lines.push("</thread-compactions>", "");
   }
 
   lines.push("<thread-transcript>");
-  for (const message of messages) {
-    lines.push(renderConversationMessageLine(message, conversation));
+  for (const [index, message] of messages.entries()) {
+    const author = escapeXml(message.author?.userName ?? message.role);
+    const ts = new Date(message.createdAtMs).toISOString();
+    const slackTsAttr = message.meta?.slackTs
+      ? ` slack_ts="${escapeXml(message.meta.slackTs)}"`
+      : "";
+    lines.push(
+      `  <message index="${index + 1}" ts="${ts}" role="${message.role}" author="${author}"${slackTsAttr}>`,
+      renderConversationMessageLine(message, conversation),
+      "  </message>",
+    );
   }
   lines.push("</thread-transcript>");
   return lines.join("\n");
@@ -240,9 +252,14 @@ async function summarizeConversationChunk(
           role: "user",
           content: [
             "Summarize the following older Slack thread transcript segment for future assistant turns.",
-            "Keep the summary factual and concise.",
-            "Preserve decisions, commitments, constraints, locations, hiring criteria, and unresolved asks.",
-            "Do not invent details.",
+            "Keep the summary factual and concise. Do not invent details.",
+            "",
+            "Output exactly three XML sections in this order:",
+            "<active-asks> one bullet per outstanding user ask that has not been narrowed, answered, or superseded by a later turn. Omit the section body if none. </active-asks>",
+            "<superseded-or-completed-asks> one bullet per ask that has been rescoped, narrowed, answered, or already acted on in this segment. Include the replacement/outcome inline. Omit the section body if none. </superseded-or-completed-asks>",
+            "<facts> one bullet per durable fact useful regardless of scope: names, ids, URLs, decisions, locations, preferences, constraints that remain true. Omit the section body if none. </facts>",
+            "",
+            "Do not output any text outside the three sections.",
             "",
             transcript,
           ].join("\n"),
diff --git a/packages/junior/tests/unit/misc/respond-helpers-user-turn.test.ts b/packages/junior/tests/unit/misc/respond-helpers-user-turn.test.ts
@@ -0,0 +1,8 @@
+import { describe, expect, it } from "vitest";
+import { buildUserTurnText } from "@/chat/respond-helpers";
+
+describe("buildUserTurnText", () => {
+  it("returns raw input when no context or metadata is provided", () => {
+    expect(buildUserTurnText("hello")).toBe("hello");
+  });
+});
diff --git a/packages/junior/tests/unit/services/conversation-memory.test.ts b/packages/junior/tests/unit/services/conversation-memory.test.ts
@@ -1,5 +1,8 @@
 import { describe, expect, it } from "vitest";
-import { getThreadTitleSourceMessage } from "@/chat/services/conversation-memory";
+import {
+  buildConversationContext,
+  getThreadTitleSourceMessage,
+} from "@/chat/services/conversation-memory";
 import { coerceThreadConversationState } from "@/chat/state/conversation";
 
 describe("conversation memory title source", () => {
@@ -58,3 +61,10 @@ describe("conversation memory title source", () => {
     );
   });
 });
+
+describe("buildConversationContext", () => {
+  it("returns undefined for an empty conversation", () => {
+    const conversation = coerceThreadConversationState({});
+    expect(buildConversationContext(conversation)).toBeUndefined();
+  });
+});