{
  "schemaVersion": 3,
  "id": "article:agent-control-planes",
  "slug": "agent-control-planes",
  "title": "From Agent Swarms to Agent Control Planes",
  "canonicalPath": "/articles/agent-control-planes/",
  "sourcePath": "content/articles/2026/agent-control-planes/article.md",
  "agentBriefPath": "content/articles/2026/agent-control-planes/agent.md",
  "thesis": "Agent orchestration is shifting from hand-written workflows toward governed control planes that route across models, tools, memory, evaluators, policies, and execution environments, making routing, observability, and policy enforcement infrastructure concerns rather than per-agent code.",
  "status": "published",
  "maturity": "contested",
  "publishedAt": "2026-06-26",
  "updatedAt": "2026-06-26",
  "audiences": [
    "builders",
    "researchers"
  ],
  "topics": [
    "ai-agents",
    "agent-orchestration",
    "llm-routing"
  ],
  "claims": [
    {
      "id": "claim-001",
      "claim": "Agent orchestration is shifting from hand-written workflows toward governed control planes that route across models, tools, memory, evaluators, policies, and execution environments.",
      "confidence": "medium",
      "status": "core",
      "evidence": [
        {
          "sourceId": "source-001",
          "snippet": "Shazeer et al. introduced a sparsely-gated Mixture-of-Experts layer that routes each token to a subset of experts, providing an early architectural precedent for learned routing across heterogeneous compute units.",
          "supports": "background",
          "assessedAt": "2026-06-26"
        },
        {
          "sourceId": "source-002",
          "snippet": "Fedus et al. simplified MoE routing to one expert per token with Switch Transformers and showed large pre-training speedups, demonstrating that conditional computation can scale to trillion-parameter models.",
          "supports": "background",
          "assessedAt": "2026-06-26"
        },
        {
          "sourceId": "source-011",
          "snippet": "LiteLLM exposes a unified proxy with model routing, fallback, spend tracking, and observability callbacks, treating cross-cutting orchestration concerns as infrastructure rather than per-agent code.",
          "supports": "direct",
          "assessedAt": "2026-06-26"
        },
        {
          "sourceId": "source-012",
          "snippet": "LangGraph provides a graph-based state machine for multi-actor agent applications with persistence, handoffs, and human-in-the-loop, embodying framework-level orchestration.",
          "supports": "direct",
          "assessedAt": "2026-06-26"
        }
      ],
      "counterevidence": [
        {
          "summary": "The shift is a trend, not a universal migration; many production agents still run as hand-written workflows without a separate control plane.",
          "assessedAt": "2026-06-26"
        },
        {
          "summary": "No canonical definition of 'control plane' exists, so vendor claims may describe very different capability sets, from thin routers to thick frameworks to enterprise governance consoles.",
          "assessedAt": "2026-06-26"
        }
      ]
    },
    {
      "id": "claim-002",
      "claim": "Mixture-of-Experts and conditional computation predate LLMs and provide the earliest architectural precedent for learned routing.",
      "confidence": "high",
      "status": "core",
      "evidence": [
        {
          "sourceId": "source-001",
          "snippet": "Shazeer et al. introduced a sparsely-gated MoE layer that routes each input token to a small subset of thousands of feed-forward experts, making routing a learnable part of the network.",
          "supports": "direct",
          "assessedAt": "2026-06-26"
        },
        {
          "sourceId": "source-002",
          "snippet": "Fedus et al. proposed Switch Transformers, which use a single expert per token and achieve large pre-training speedups, bridging conditional computation to modern large-model scale.",
          "supports": "direct",
          "assessedAt": "2026-06-26"
        }
      ],
      "counterevidence": [
        {
          "summary": "MoE routing is internal to a single model and trained end-to-end, whereas agent control planes route across independent models, tools, and agents with distinct latency, cost, and failure semantics.",
          "assessedAt": "2026-06-26"
        },
        {
          "summary": "Token-level gating optimizes for training throughput and capacity, while agent routing must also enforce policies, produce audit trails, and handle human escalation.",
          "assessedAt": "2026-06-26"
        }
      ]
    },
    {
      "id": "claim-003",
      "claim": "Learned routers such as FrugalGPT and RouteLLM can match or exceed single-model accuracy at a fraction of the cost, though benchmark caveats apply.",
      "confidence": "medium",
      "status": "core",
      "evidence": [
        {
          "sourceId": "source-003",
          "snippet": "Dohan et al. frame chain-of-thought, verifiers, tool use, and selection-inference as probabilistic programs composed from language models, providing a compositional foundation for cascades and routers.",
          "supports": "background",
          "assessedAt": "2026-06-26"
        },
        {
          "sourceId": "source-004",
          "snippet": "Chen et al. propose FrugalGPT, which learns which model combination to call for each query and reports up to 98% cost reduction compared with using GPT-4 alone while preserving accuracy.",
          "supports": "direct",
          "assessedAt": "2026-06-26"
        },
        {
          "sourceId": "source-005",
          "snippet": "Ong et al. train routers on human preference data and show strong transfer to new model pools, suggesting learned routers can adapt as the underlying model landscape changes.",
          "supports": "direct",
          "assessedAt": "2026-06-26"
        },
        {
          "sourceId": "source-019",
          "snippet": "Hu et al. introduce RouterBench, a standardized dataset and evaluation framework for comparing multi-LLM routing strategies on accuracy-cost tradeoffs.",
          "supports": "analogous",
          "assessedAt": "2026-06-26"
        }
      ],
      "counterevidence": [
        {
          "summary": "Reported cost reductions are benchmark-specific; real-world savings depend on query distribution, model pricing, and latency requirements that change frequently.",
          "assessedAt": "2026-06-26"
        },
        {
          "summary": "RouterBench and similar evaluations primarily measure accuracy and cost, often underweighting reliability, policy compliance, and latency.",
          "assessedAt": "2026-06-26"
        }
      ]
    },
    {
      "id": "claim-004",
      "claim": "Test-time search strategies—self-consistency, Tree of Thoughts, Reflexion, and multi-agent debate—expand what a control plane can spend compute on at runtime.",
      "confidence": "medium",
      "status": "argument",
      "evidence": [
        {
          "sourceId": "source-013",
          "snippet": "Wang et al. show that self-consistency, which samples multiple reasoning paths and takes a majority vote, improves chain-of-thought reasoning without changing the underlying model.",
          "supports": "direct",
          "assessedAt": "2026-06-26"
        },
        {
          "sourceId": "source-006",
          "snippet": "Yao et al. generalize chain-of-thought to Tree of Thoughts, where an explicit search tree over reasoning steps enables backtracking and lookahead.",
          "supports": "direct",
          "assessedAt": "2026-06-26"
        },
        {
          "sourceId": "source-014",
          "snippet": "Shinn et al. introduce Reflexion, which uses linguistic feedback and an episodic memory buffer to let agents improve during a task without gradient updates.",
          "supports": "direct",
          "assessedAt": "2026-06-26"
        },
        {
          "sourceId": "source-015",
          "snippet": "Du et al. demonstrate that multi-agent debate, in which multiple LLM instances argue over rounds, can improve factual accuracy and reasoning.",
          "supports": "direct",
          "assessedAt": "2026-06-26"
        }
      ],
      "counterevidence": [
        {
          "summary": "Each strategy multiplies compute usage through sampling, search, reflection loops, or debate rounds, increasing latency and cost in ways that may not be justified for simple queries.",
          "assessedAt": "2026-06-26"
        },
        {
          "summary": "Performance gains are task-dependent; self-consistency and debate can degrade results when the majority of sampled or argued answers is systematically wrong.",
          "assessedAt": "2026-06-26"
        }
      ]
    },
    {
      "id": "claim-005",
      "claim": "Recent learned orchestrators such as Sakana Fugu, Trinity, and Conductor are signals of automated scaffold generation, not settled production recipes.",
      "confidence": "medium",
      "status": "argument",
      "evidence": [
        {
          "sourceId": "source-009",
          "snippet": "Tang et al. train Sakana Fugu orchestrator models to understand a query and dynamically generate agent teams and scaffolds, reporting strong results across coding, reasoning, and exam benchmarks.",
          "supports": "direct",
          "assessedAt": "2026-06-26"
        },
        {
          "sourceId": "source-017",
          "snippet": "Xu et al. propose Trinity, which uses a small coordinator to assign Thinker, Worker, and Verifier roles among LLMs and harmonize them as a single mind.",
          "supports": "direct",
          "assessedAt": "2026-06-26"
        },
        {
          "sourceId": "source-018",
          "snippet": "Nielsen et al. train Conductor with reinforcement learning to design communication topologies and prompt workers in multi-agent systems, supporting recursive self-selection.",
          "supports": "direct",
          "assessedAt": "2026-06-26"
        },
        {
          "sourceId": "source-010",
          "snippet": "Zhang et al. use Monte Carlo Tree Search over code-represented workflows to automate agentic workflow generation, an earlier example of learned scaffold design.",
          "supports": "analogous",
          "assessedAt": "2026-06-26"
        }
      ],
      "counterevidence": [
        {
          "summary": "Fugu, Trinity, and Conductor are preprints or very recent papers; their benchmark results have not been independently reproduced and may reflect task leakage or cherry-picking.",
          "assessedAt": "2026-06-26"
        },
        {
          "summary": "Adding an orchestrator model introduces its own latency, cost, failure modes, and training-data biases, which are rarely reported at production scale.",
          "assessedAt": "2026-06-26"
        }
      ]
    },
    {
      "id": "claim-006",
      "claim": "Production control planes combine routing, fallback, policy, memory, evaluation, observability, and lifecycle governance, but no single vendor owns all of them.",
      "confidence": "medium",
      "status": "argument",
      "evidence": [
        {
          "sourceId": "source-011",
          "snippet": "LiteLLM provides a gateway with unified API, model routing, fallback, caching, rate limiting, spend tracking, and observability callbacks.",
          "supports": "direct",
          "assessedAt": "2026-06-26"
        },
        {
          "sourceId": "source-012",
          "snippet": "LangGraph offers a graph-based runtime with state, handoffs, tool orchestration, persistence, and human-in-the-loop, covering framework-level control-plane concerns.",
          "supports": "direct",
          "assessedAt": "2026-06-26"
        },
        {
          "sourceId": "source-018",
          "snippet": "Conductor shows how reinforcement learning can automate coordination topologies, illustrating a learned-routing capability that complements static gateway rules.",
          "supports": "analogous",
          "assessedAt": "2026-06-26"
        }
      ],
      "counterevidence": [
        {
          "summary": "Gateways like LiteLLM know little about an agent's internal reasoning, while frameworks like LangGraph usually operate within a single codebase and lack enterprise multi-tenancy and billing.",
          "assessedAt": "2026-06-26"
        },
        {
          "summary": "Enterprise platforms bundle governance, connectors, and approval workflows but often trade flexibility for vendor lock-in, so no single product covers all seven capabilities well.",
          "assessedAt": "2026-06-26"
        }
      ]
    },
    {
      "id": "claim-007",
      "claim": "The term 'control plane' is contested across vendors; teams should judge products by concrete capabilities rather than marketing labels.",
      "confidence": "medium",
      "status": "argument",
      "evidence": [
        {
          "sourceId": "source-011",
          "snippet": "LiteLLM is marketed as an open-source LLM proxy and gateway, emphasizing unified API, routing, and spend management rather than a full enterprise control plane.",
          "supports": "direct",
          "assessedAt": "2026-06-26"
        },
        {
          "sourceId": "source-012",
          "snippet": "LangGraph is positioned as a framework and state machine for building agents, overlapping with but distinct from gateway-style routing and policy enforcement.",
          "supports": "direct",
          "assessedAt": "2026-06-26"
        },
        {
          "sourceId": "source-009",
          "snippet": "Sakana Fugu frames control as learned scaffold generation, a different capability emphasis from routing, policy, or observability.",
          "supports": "analogous",
          "assessedAt": "2026-06-26"
        }
      ],
      "counterevidence": [
        {
          "summary": "The term is contested partly because the category is nascent; a canonical definition or standard taxonomy may emerge as the market matures.",
          "assessedAt": "2026-06-26"
        },
        {
          "summary": "Individual vendors may use the term consistently inside their own product taxonomies even if their definitions differ from one another.",
          "assessedAt": "2026-06-26"
        }
      ]
    },
    {
      "id": "claim-008",
      "claim": "Teams should treat model selection, fallback, observability, and policy enforcement as infrastructure concerns rather than per-agent code.",
      "confidence": "medium",
      "status": "argument",
      "evidence": [
        {
          "sourceId": "source-004",
          "snippet": "FrugalGPT shows that model selection can be learned and centralized into a cascade, rather than hard-coded for each query in application logic.",
          "supports": "direct",
          "assessedAt": "2026-06-26"
        },
        {
          "sourceId": "source-005",
          "snippet": "RouteLLM demonstrates that a router trained once can transfer to new model pools, suggesting model selection can be maintained as shared infrastructure.",
          "supports": "direct",
          "assessedAt": "2026-06-26"
        },
        {
          "sourceId": "source-011",
          "snippet": "LiteLLM centralizes routing, fallback, retry, spend tracking, and observability callbacks behind a single proxy API.",
          "supports": "direct",
          "assessedAt": "2026-06-26"
        },
        {
          "sourceId": "source-019",
          "snippet": "RouterBench provides a common evaluation framework for routing strategies, supporting the treatment of model selection as a reusable infrastructure component.",
          "supports": "analogous",
          "assessedAt": "2026-06-26"
        }
      ],
      "counterevidence": [
        {
          "summary": "Centralizing these concerns creates a new single point of failure and a potential latency bottleneck that must be engineered for resilience and observability.",
          "assessedAt": "2026-06-26"
        },
        {
          "summary": "Simple agents with stable, narrow task boundaries may not benefit enough from a separate control plane to justify the added operational complexity.",
          "assessedAt": "2026-06-26"
        }
      ]
    }
  ],
  "sources": [
    {
      "id": "source-001",
      "title": "Shazeer et al., Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer",
      "url": "https://arxiv.org/abs/1701.06538",
      "type": "paper",
      "accessed": "2026-06-26"
    },
    {
      "id": "source-002",
      "title": "Fedus et al., Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity",
      "url": "https://arxiv.org/abs/2101.03961",
      "type": "paper",
      "accessed": "2026-06-26"
    },
    {
      "id": "source-003",
      "title": "Dohan et al., Language Model Cascades",
      "url": "https://arxiv.org/abs/2207.10342",
      "type": "paper",
      "accessed": "2026-06-26"
    },
    {
      "id": "source-004",
      "title": "Chen et al., FrugalGPT: How to Use Large Language Models While Reducing Cost and Improving Performance",
      "url": "https://arxiv.org/abs/2305.05176",
      "type": "paper",
      "accessed": "2026-06-26"
    },
    {
      "id": "source-005",
      "title": "Ong et al., RouteLLM: Learning to Route LLMs with Preference Data",
      "url": "https://arxiv.org/abs/2406.18665",
      "type": "paper",
      "accessed": "2026-06-26"
    },
    {
      "id": "source-006",
      "title": "Yao et al., Tree of Thoughts: Deliberate Problem Solving with Large Language Models",
      "url": "https://arxiv.org/abs/2305.10601",
      "type": "paper",
      "accessed": "2026-06-26"
    },
    {
      "id": "source-007",
      "title": "Yao et al., ReAct: Synergizing Reasoning and Acting in Language Models",
      "url": "https://arxiv.org/abs/2210.03629",
      "type": "paper",
      "accessed": "2026-06-26"
    },
    {
      "id": "source-008",
      "title": "Wang et al., Mixture-of-Agents Enhances Large Language Model Capabilities",
      "url": "https://arxiv.org/abs/2406.04692",
      "type": "paper",
      "accessed": "2026-06-26"
    },
    {
      "id": "source-009",
      "title": "Tang et al., Sakana Fugu: Orchestrator Models for Adaptive Agentic Scaffolds",
      "url": "https://arxiv.org/abs/2606.21228",
      "type": "paper",
      "accessed": "2026-06-26"
    },
    {
      "id": "source-010",
      "title": "Zhang et al., AFlow: Automating Agentic Workflow Generation",
      "url": "https://arxiv.org/abs/2410.10762",
      "type": "paper",
      "accessed": "2026-06-26"
    },
    {
      "id": "source-011",
      "title": "LiteLLM documentation",
      "url": "https://docs.litellm.ai/",
      "type": "documentation",
      "accessed": "2026-06-26"
    },
    {
      "id": "source-012",
      "title": "LangGraph documentation (LangChain)",
      "url": "https://www.langchain.com/langgraph",
      "type": "documentation",
      "accessed": "2026-06-26"
    },
    {
      "id": "source-013",
      "title": "Wang et al., Self-Consistency Improves Chain of Thought Reasoning in Language Models",
      "url": "https://arxiv.org/abs/2203.11171",
      "type": "paper",
      "accessed": "2026-06-26"
    },
    {
      "id": "source-014",
      "title": "Shinn et al., Reflexion: Self-Reflective Agents with Dynamic Memory",
      "url": "https://arxiv.org/abs/2303.11366",
      "type": "paper",
      "accessed": "2026-06-26"
    },
    {
      "id": "source-015",
      "title": "Du et al., Improving Factuality and Reasoning in Language Models through Multiagent Debate",
      "url": "https://arxiv.org/abs/2305.14325",
      "type": "paper",
      "accessed": "2026-06-26"
    },
    {
      "id": "source-016",
      "title": "Jiang et al., LLM-Blender: Ensembling Large Language Models with Pairwise Ranking and Generative Fusion",
      "url": "https://arxiv.org/abs/2306.02561",
      "type": "paper",
      "accessed": "2026-06-26"
    },
    {
      "id": "source-017",
      "title": "Xu et al., Trinity: Harmonizing Multiple Large Language Models as a Single Mind",
      "url": "https://arxiv.org/abs/2512.04695",
      "type": "paper",
      "accessed": "2026-06-26"
    },
    {
      "id": "source-018",
      "title": "Nielsen et al., Conductor: Learning to Orchestrate LLM Agents via Reinforcement Learning",
      "url": "https://arxiv.org/abs/2512.04388",
      "type": "paper",
      "accessed": "2026-06-26"
    },
    {
      "id": "source-019",
      "title": "Hu et al., RouterBench: A Benchmark for Multi-LLM Routing",
      "url": "https://arxiv.org/abs/2403.12031",
      "type": "paper",
      "accessed": "2026-06-26"
    },
    {
      "id": "source-020",
      "title": "Hu et al., Automated Design of Agentic Systems",
      "url": "https://arxiv.org/abs/2408.08435",
      "type": "paper",
      "accessed": "2026-06-26"
    },
    {
      "id": "source-021",
      "title": "Yue et al., MASRouter: A Multiplexing LLM Agent Router",
      "url": "https://arxiv.org/abs/2502.11133",
      "type": "paper",
      "accessed": "2026-06-26"
    },
    {
      "id": "source-022",
      "title": "Li et al., AgentPrune: Reducing Communication Redundancy in Multi-Agent Systems",
      "url": "https://arxiv.org/abs/2410.02506",
      "type": "paper",
      "accessed": "2026-06-26"
    }
  ],
  "related": [],
  "agentInstructions": [
    "Use claim IDs as the retrieval unit.",
    "Treat maturity=contested as an explicit uncertainty marker.",
    "Do not present speculative claims as settled facts."
  ],
  "contentHash": "98f24b29680f350d1b34461bb20e21e32209cb79d8ab2e5822575fab2c08df9c",
  "provenance": {
    "createdAt": "2026-06-26",
    "createdBy": "human",
    "agents": [
      {
        "role": "drafting",
        "model": "kimi",
        "invokedAt": "2026-06-26",
        "inputHash": "sha256:852663a4745a1b043b62567fcb06d0eb7ed3c047388f73b7bd965cb02f465361",
        "outputHash": "sha256:031a91bb4b296847ab01e961f3f925051f3b93ce2ba26f7034007bda42aedc26"
      }
    ],
    "reviews": [
      {
        "reviewer": "agent",
        "reviewedAt": "2026-06-26",
        "status": "approved",
        "scope": [
          "claims",
          "tone",
          "privacy",
          "scope"
        ],
        "notes": "Sibling-agent review against article-proposal-ideation eval-card. Privacy scan passed. No proprietary or personal content detected.",
        "contentHash": "98f24b29680f350d1b34461bb20e21e32209cb79d8ab2e5822575fab2c08df9c"
      },
      {
        "reviewer": "human",
        "reviewedAt": "2026-06-26",
        "status": "approved",
        "scope": [
          "thesis",
          "scope",
          "tone"
        ],
        "notes": "Human author approved the draft for website publication.",
        "contentHash": "98f24b29680f350d1b34461bb20e21e32209cb79d8ab2e5822575fab2c08df9c"
      }
    ],
    "policy": {
      "id": "policy:default",
      "version": "1.0.0"
    }
  },
  "generatedAt": "2026-06-29T00:00:00.000Z",
  "articleUrl": "https://aura-knowledge.github.io/articles/agent-control-planes/",
  "agentJsonPath": "/agents/articles/agent-control-planes.json",
  "agentMarkdownPath": "/agents/articles/agent-control-planes.md",
  "sourceRepoPath": "content/articles/2026/agent-control-planes/article.md",
  "sourceGitHubUrl": "https://github.com/aura-knowledge/aura-knowledge.github.io/blob/main/content/articles/2026/agent-control-planes/article.md",
  "tokenEstimate": 1107,
  "sectionOutline": []
}
