refactor: reorganize OpenClaw services and enhance session management

This commit is contained in:
Abhimanyu Saharan
2026-02-10 14:50:27 +05:30
parent 6f070df74b
commit 82425edd69
24 changed files with 4454 additions and 3380 deletions

View File

@@ -1,159 +0,0 @@
"""Helpers for ensuring each board has a provisioned lead agent."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any
from sqlmodel import col, select
from app.core.agent_tokens import generate_agent_token, hash_agent_token
from app.core.time import utcnow
from app.integrations.openclaw_gateway import GatewayConfig as GatewayClientConfig
from app.integrations.openclaw_gateway import OpenClawGatewayError, ensure_session, send_message
from app.models.agents import Agent
from app.services.agent_provisioning import (
DEFAULT_HEARTBEAT_CONFIG,
AgentProvisionRequest,
ProvisionOptions,
provision_agent,
)
if TYPE_CHECKING:
from sqlmodel.ext.asyncio.session import AsyncSession
from app.models.boards import Board
from app.models.gateways import Gateway
from app.models.users import User
def lead_session_key(board: Board) -> str:
"""Return the deterministic main session key for a board lead agent."""
return f"agent:lead-{board.id}:main"
def lead_agent_name(_: Board) -> str:
"""Return the default display name for board lead agents."""
return "Lead Agent"
@dataclass(frozen=True, slots=True)
class LeadAgentOptions:
"""Optional overrides for board-lead provisioning behavior."""
agent_name: str | None = None
identity_profile: dict[str, str] | None = None
action: str = "provision"
@dataclass(frozen=True, slots=True)
class LeadAgentRequest:
"""Inputs required to ensure or provision a board lead agent."""
board: Board
gateway: Gateway
config: GatewayClientConfig
user: User | None
options: LeadAgentOptions = field(default_factory=LeadAgentOptions)
async def ensure_board_lead_agent(
session: AsyncSession,
*,
request: LeadAgentRequest,
) -> tuple[Agent, bool]:
"""Ensure a board has a lead agent; return `(agent, created)`."""
board = request.board
config_options = request.options
existing = (
await session.exec(
select(Agent)
.where(Agent.board_id == board.id)
.where(col(Agent.is_board_lead).is_(True)),
)
).first()
if existing:
desired_name = config_options.agent_name or lead_agent_name(board)
changed = False
if existing.name != desired_name:
existing.name = desired_name
changed = True
if existing.gateway_id != request.gateway.id:
existing.gateway_id = request.gateway.id
changed = True
desired_session_key = lead_session_key(board)
if not existing.openclaw_session_id:
existing.openclaw_session_id = desired_session_key
changed = True
if changed:
existing.updated_at = utcnow()
session.add(existing)
await session.commit()
await session.refresh(existing)
return existing, False
merged_identity_profile: dict[str, Any] = {
"role": "Board Lead",
"communication_style": "direct, concise, practical",
"emoji": ":gear:",
}
if config_options.identity_profile:
merged_identity_profile.update(
{
key: value.strip()
for key, value in config_options.identity_profile.items()
if value.strip()
},
)
agent = Agent(
name=config_options.agent_name or lead_agent_name(board),
status="provisioning",
board_id=board.id,
gateway_id=request.gateway.id,
is_board_lead=True,
heartbeat_config=DEFAULT_HEARTBEAT_CONFIG.copy(),
identity_profile=merged_identity_profile,
openclaw_session_id=lead_session_key(board),
provision_requested_at=utcnow(),
provision_action=config_options.action,
)
raw_token = generate_agent_token()
agent.agent_token_hash = hash_agent_token(raw_token)
session.add(agent)
await session.commit()
await session.refresh(agent)
try:
await provision_agent(
agent,
AgentProvisionRequest(
board=board,
gateway=request.gateway,
auth_token=raw_token,
user=request.user,
options=ProvisionOptions(action=config_options.action),
),
)
if agent.openclaw_session_id:
await ensure_session(
agent.openclaw_session_id,
config=request.config,
label=agent.name,
)
await send_message(
(
f"Hello {agent.name}. Your workspace has been provisioned.\n\n"
"Start the agent, run BOOT.md, and if BOOTSTRAP.md exists run "
"it once "
"then delete it. Begin heartbeats after startup."
),
session_key=agent.openclaw_session_id,
config=request.config,
deliver=True,
)
except OpenClawGatewayError:
# Best-effort provisioning. The board/agent rows should still exist.
pass
return agent, True

View File

@@ -2,22 +2,20 @@
from __future__ import annotations
from datetime import timedelta
from typing import TYPE_CHECKING
from sqlalchemy import case, func
from sqlmodel import col, select
from app.core.time import utcnow
from app.models.agents import Agent
from app.models.approvals import Approval
from app.models.board_memory import BoardMemory
from app.models.tasks import Task
from app.schemas.agents import AgentRead
from app.schemas.approvals import ApprovalRead
from app.schemas.board_memory import BoardMemoryRead
from app.schemas.boards import BoardRead
from app.schemas.view_models import BoardSnapshot, TaskCardRead
from app.services.openclaw import AgentLifecycleService
from app.services.task_dependencies import (
blocked_by_dependency_ids,
dependency_ids_by_task_id,
@@ -31,31 +29,6 @@ if TYPE_CHECKING:
from app.models.boards import Board
OFFLINE_AFTER = timedelta(minutes=10)
def _computed_agent_status(agent: Agent) -> str:
now = utcnow()
if agent.status in {"deleting", "updating"}:
return agent.status
if agent.last_seen_at is None:
return "provisioning"
if now - agent.last_seen_at > OFFLINE_AFTER:
return "offline"
return agent.status
def _agent_to_read(agent: Agent) -> AgentRead:
model = AgentRead.model_validate(agent, from_attributes=True)
computed_status = _computed_agent_status(agent)
is_gateway_main = agent.gateway_id is not None and agent.board_id is None
return model.model_copy(
update={
"status": computed_status,
"is_gateway_main": is_gateway_main,
},
)
def _memory_to_read(memory: BoardMemory) -> BoardMemoryRead:
return BoardMemoryRead.model_validate(memory, from_attributes=True)
@@ -125,7 +98,10 @@ async def build_board_snapshot(session: AsyncSession, board: Board) -> BoardSnap
.order_by(col(Agent.created_at).desc())
.all(session)
)
agent_reads = [_agent_to_read(agent) for agent in agents]
agent_reads = [
AgentLifecycleService.to_agent_read(AgentLifecycleService.with_computed_status(agent))
for agent in agents
]
agent_name_by_id = {agent.id: agent.name for agent in agents}
pending_approvals_count = int(

View File

@@ -1,31 +0,0 @@
"""Helpers for dedicated gateway-scoped agent identity/session keys."""
from __future__ import annotations
from uuid import UUID
from app.models.gateways import Gateway
_GATEWAY_AGENT_PREFIX = "agent:gateway-"
_GATEWAY_AGENT_SUFFIX = ":main"
_GATEWAY_OPENCLAW_AGENT_PREFIX = "mc-gateway-"
def gateway_agent_session_key_for_id(gateway_id: UUID) -> str:
"""Return the dedicated Mission Control gateway-agent session key for an id."""
return f"{_GATEWAY_AGENT_PREFIX}{gateway_id}{_GATEWAY_AGENT_SUFFIX}"
def gateway_agent_session_key(gateway: Gateway) -> str:
"""Return the dedicated Mission Control gateway-agent session key."""
return gateway_agent_session_key_for_id(gateway.id)
def gateway_openclaw_agent_id_for_id(gateway_id: UUID) -> str:
"""Return the dedicated OpenClaw config `agentId` for a gateway agent."""
return f"{_GATEWAY_OPENCLAW_AGENT_PREFIX}{gateway_id}"
def gateway_openclaw_agent_id(gateway: Gateway) -> str:
"""Return the dedicated OpenClaw config `agentId` for a gateway agent."""
return gateway_openclaw_agent_id_for_id(gateway.id)

View File

@@ -0,0 +1,7 @@
"""OpenClaw lifecycle services package."""
from .constants import * # noqa: F401,F403
from .exceptions import * # noqa: F401,F403
from .provisioning import * # noqa: F401,F403
from .services import * # noqa: F401,F403
from .shared import * # noqa: F401,F403

View File

@@ -0,0 +1,120 @@
"""Shared constants for lifecycle orchestration services."""
from __future__ import annotations
import random
import re
from datetime import timedelta
from typing import Any
_GATEWAY_AGENT_PREFIX = "agent:gateway-"
_GATEWAY_AGENT_SUFFIX = ":main"
_GATEWAY_OPENCLAW_AGENT_PREFIX = "mc-gateway-"
DEFAULT_HEARTBEAT_CONFIG: dict[str, Any] = {
"every": "10m",
"target": "none",
"includeReasoning": False,
}
OFFLINE_AFTER = timedelta(minutes=10)
AGENT_SESSION_PREFIX = "agent"
DEFAULT_CHANNEL_HEARTBEAT_VISIBILITY: dict[str, bool] = {
# Suppress routine HEARTBEAT_OK delivery by default.
"showOk": False,
"showAlerts": True,
"useIndicator": True,
}
DEFAULT_IDENTITY_PROFILE = {
"role": "Generalist",
"communication_style": "direct, concise, practical",
"emoji": ":gear:",
}
IDENTITY_PROFILE_FIELDS = {
"role": "identity_role",
"communication_style": "identity_communication_style",
"emoji": "identity_emoji",
}
EXTRA_IDENTITY_PROFILE_FIELDS = {
"autonomy_level": "identity_autonomy_level",
"verbosity": "identity_verbosity",
"output_format": "identity_output_format",
"update_cadence": "identity_update_cadence",
# Per-agent charter (optional).
# Used to give agents a "purpose in life" and a distinct vibe.
"purpose": "identity_purpose",
"personality": "identity_personality",
"custom_instructions": "identity_custom_instructions",
}
DEFAULT_GATEWAY_FILES = frozenset(
{
"AGENTS.md",
"SOUL.md",
"TASK_SOUL.md",
"SELF.md",
"AUTONOMY.md",
"TOOLS.md",
"IDENTITY.md",
"USER.md",
"HEARTBEAT.md",
"BOOT.md",
"BOOTSTRAP.md",
"MEMORY.md",
},
)
# These files are intended to evolve within the agent workspace.
# Provision them if missing, but avoid overwriting existing content during updates.
#
# Examples:
# - SELF.md: evolving identity/preferences
# - USER.md: human-provided context + lead intake notes
# - MEMORY.md: curated long-term memory (consolidated)
PRESERVE_AGENT_EDITABLE_FILES = frozenset({"SELF.md", "USER.md", "MEMORY.md", "TASK_SOUL.md"})
HEARTBEAT_LEAD_TEMPLATE = "HEARTBEAT_LEAD.md"
HEARTBEAT_AGENT_TEMPLATE = "HEARTBEAT_AGENT.md"
SESSION_KEY_PARTS_MIN = 2
_SESSION_KEY_PARTS_MIN = SESSION_KEY_PARTS_MIN
MAIN_TEMPLATE_MAP = {
"AGENTS.md": "MAIN_AGENTS.md",
"HEARTBEAT.md": "MAIN_HEARTBEAT.md",
"USER.md": "MAIN_USER.md",
"BOOT.md": "MAIN_BOOT.md",
"TOOLS.md": "MAIN_TOOLS.md",
}
_TOOLS_KV_RE = re.compile(r"^(?P<key>[A-Z0-9_]+)=(?P<value>.*)$")
_NON_TRANSIENT_GATEWAY_ERROR_MARKERS = ("unsupported file",)
_TRANSIENT_GATEWAY_ERROR_MARKERS = (
"connect call failed",
"connection refused",
"errno 111",
"econnrefused",
"did not receive a valid http response",
"no route to host",
"network is unreachable",
"host is down",
"name or service not known",
"received 1012",
"service restart",
"http 503",
"http 502",
"http 504",
"temporar",
"timeout",
"timed out",
"connection closed",
"connection reset",
)
_COORDINATION_GATEWAY_TIMEOUT_S = 45.0
_COORDINATION_GATEWAY_BASE_DELAY_S = 0.5
_COORDINATION_GATEWAY_MAX_DELAY_S = 5.0
_SECURE_RANDOM = random.SystemRandom()

View File

@@ -0,0 +1,90 @@
"""OpenClaw-specific exception definitions and mapping helpers."""
from __future__ import annotations
from dataclasses import dataclass
from enum import Enum
from fastapi import HTTPException, status
class GatewayOperation(str, Enum):
"""Typed gateway operations used for consistent HTTP error mapping."""
NUDGE_AGENT = "nudge_agent"
SOUL_READ = "soul_read"
SOUL_WRITE = "soul_write"
ASK_USER_DISPATCH = "ask_user_dispatch"
LEAD_MESSAGE_DISPATCH = "lead_message_dispatch"
LEAD_BROADCAST_DISPATCH = "lead_broadcast_dispatch"
ONBOARDING_START_DISPATCH = "onboarding_start_dispatch"
ONBOARDING_ANSWER_DISPATCH = "onboarding_answer_dispatch"
@dataclass(frozen=True, slots=True)
class GatewayErrorPolicy:
"""HTTP policy for mapping gateway operation failures."""
status_code: int
detail_template: str
_GATEWAY_ERROR_POLICIES: dict[GatewayOperation, GatewayErrorPolicy] = {
GatewayOperation.NUDGE_AGENT: GatewayErrorPolicy(
status_code=status.HTTP_502_BAD_GATEWAY,
detail_template="Gateway nudge failed: {error}",
),
GatewayOperation.SOUL_READ: GatewayErrorPolicy(
status_code=status.HTTP_502_BAD_GATEWAY,
detail_template="Gateway SOUL read failed: {error}",
),
GatewayOperation.SOUL_WRITE: GatewayErrorPolicy(
status_code=status.HTTP_502_BAD_GATEWAY,
detail_template="Gateway SOUL update failed: {error}",
),
GatewayOperation.ASK_USER_DISPATCH: GatewayErrorPolicy(
status_code=status.HTTP_502_BAD_GATEWAY,
detail_template="Gateway ask-user dispatch failed: {error}",
),
GatewayOperation.LEAD_MESSAGE_DISPATCH: GatewayErrorPolicy(
status_code=status.HTTP_502_BAD_GATEWAY,
detail_template="Gateway lead message dispatch failed: {error}",
),
GatewayOperation.LEAD_BROADCAST_DISPATCH: GatewayErrorPolicy(
status_code=status.HTTP_502_BAD_GATEWAY,
detail_template="Gateway lead broadcast dispatch failed: {error}",
),
GatewayOperation.ONBOARDING_START_DISPATCH: GatewayErrorPolicy(
status_code=status.HTTP_502_BAD_GATEWAY,
detail_template="Gateway onboarding start dispatch failed: {error}",
),
GatewayOperation.ONBOARDING_ANSWER_DISPATCH: GatewayErrorPolicy(
status_code=status.HTTP_502_BAD_GATEWAY,
detail_template="Gateway onboarding answer dispatch failed: {error}",
),
}
def map_gateway_error_to_http_exception(
operation: GatewayOperation,
exc: Exception,
) -> HTTPException:
"""Map a gateway failure into a typed HTTP exception."""
policy = _GATEWAY_ERROR_POLICIES[operation]
return HTTPException(
status_code=policy.status_code,
detail=policy.detail_template.format(error=str(exc)),
)
def map_gateway_error_message(
operation: GatewayOperation,
exc: Exception,
) -> str:
"""Map a gateway failure into a stable error message string."""
if isinstance(exc, HTTPException):
detail = exc.detail
if isinstance(detail, str):
return detail
return str(detail)
return map_gateway_error_to_http_exception(operation, exc).detail

View File

@@ -1,106 +1,65 @@
"""Gateway-facing agent provisioning and cleanup helpers."""
"""Provisioning, template sync, and board-lead lifecycle orchestration."""
from __future__ import annotations
from abc import ABC, abstractmethod
import asyncio
import hashlib
import json
import re
from abc import ABC, abstractmethod
from collections.abc import Awaitable, Callable
from contextlib import suppress
from dataclasses import dataclass, field
from pathlib import Path
from typing import TYPE_CHECKING, Any
from uuid import uuid4
from typing import TYPE_CHECKING, Any, TypeVar
from uuid import UUID, uuid4
from jinja2 import Environment, FileSystemLoader, StrictUndefined, select_autoescape
from sqlalchemy import func
from sqlmodel import col, select
from app.core.agent_tokens import generate_agent_token, hash_agent_token, verify_agent_token
from app.core.config import settings
from app.core.time import utcnow
from app.integrations.openclaw_gateway import GatewayConfig as GatewayClientConfig
from app.integrations.openclaw_gateway import OpenClawGatewayError, ensure_session, openclaw_call
from app.services.gateway_agents import (
gateway_agent_session_key,
gateway_openclaw_agent_id,
from app.integrations.openclaw_gateway import (
OpenClawGatewayError,
ensure_session,
openclaw_call,
send_message,
)
from app.models.agents import Agent
from app.models.board_memory import BoardMemory
from app.models.boards import Board
from app.models.gateways import Gateway
from app.schemas.gateways import GatewayTemplatesSyncError, GatewayTemplatesSyncResult
from app.services.openclaw.constants import (
_COORDINATION_GATEWAY_BASE_DELAY_S,
_COORDINATION_GATEWAY_MAX_DELAY_S,
_COORDINATION_GATEWAY_TIMEOUT_S,
_NON_TRANSIENT_GATEWAY_ERROR_MARKERS,
_SECURE_RANDOM,
_SESSION_KEY_PARTS_MIN,
_TOOLS_KV_RE,
_TRANSIENT_GATEWAY_ERROR_MARKERS,
DEFAULT_CHANNEL_HEARTBEAT_VISIBILITY,
DEFAULT_GATEWAY_FILES,
DEFAULT_HEARTBEAT_CONFIG,
DEFAULT_IDENTITY_PROFILE,
EXTRA_IDENTITY_PROFILE_FIELDS,
HEARTBEAT_AGENT_TEMPLATE,
HEARTBEAT_LEAD_TEMPLATE,
IDENTITY_PROFILE_FIELDS,
MAIN_TEMPLATE_MAP,
PRESERVE_AGENT_EDITABLE_FILES,
)
from app.services.openclaw.shared import GatewayAgentIdentity
if TYPE_CHECKING:
from app.models.agents import Agent
from app.models.boards import Board
from app.models.gateways import Gateway
from sqlmodel.ext.asyncio.session import AsyncSession
from app.models.users import User
DEFAULT_HEARTBEAT_CONFIG: dict[str, Any] = {
"every": "10m",
"target": "none",
# Keep heartbeat delivery concise by default.
"includeReasoning": False,
}
DEFAULT_CHANNEL_HEARTBEAT_VISIBILITY: dict[str, bool] = {
# Suppress routine HEARTBEAT_OK delivery by default.
"showOk": False,
"showAlerts": True,
"useIndicator": True,
}
DEFAULT_IDENTITY_PROFILE = {
"role": "Generalist",
"communication_style": "direct, concise, practical",
"emoji": ":gear:",
}
IDENTITY_PROFILE_FIELDS = {
"role": "identity_role",
"communication_style": "identity_communication_style",
"emoji": "identity_emoji",
}
EXTRA_IDENTITY_PROFILE_FIELDS = {
"autonomy_level": "identity_autonomy_level",
"verbosity": "identity_verbosity",
"output_format": "identity_output_format",
"update_cadence": "identity_update_cadence",
# Per-agent charter (optional).
# Used to give agents a "purpose in life" and a distinct vibe.
"purpose": "identity_purpose",
"personality": "identity_personality",
"custom_instructions": "identity_custom_instructions",
}
DEFAULT_GATEWAY_FILES = frozenset(
{
"AGENTS.md",
"SOUL.md",
"TASK_SOUL.md",
"SELF.md",
"AUTONOMY.md",
"TOOLS.md",
"IDENTITY.md",
"USER.md",
"HEARTBEAT.md",
"BOOT.md",
"BOOTSTRAP.md",
"MEMORY.md",
},
)
# These files are intended to evolve within the agent workspace.
# Provision them if missing, but avoid overwriting existing content during updates.
#
# Examples:
# - SELF.md: evolving identity/preferences
# - USER.md: human-provided context + lead intake notes
# - MEMORY.md: curated long-term memory (consolidated)
PRESERVE_AGENT_EDITABLE_FILES = frozenset({"SELF.md", "USER.md", "MEMORY.md", "TASK_SOUL.md"})
HEARTBEAT_LEAD_TEMPLATE = "HEARTBEAT_LEAD.md"
HEARTBEAT_AGENT_TEMPLATE = "HEARTBEAT_AGENT.md"
_SESSION_KEY_PARTS_MIN = 2
MAIN_TEMPLATE_MAP = {
"AGENTS.md": "MAIN_AGENTS.md",
"HEARTBEAT.md": "MAIN_HEARTBEAT.md",
"USER.md": "MAIN_USER.md",
"BOOT.md": "MAIN_BOOT.md",
"TOOLS.md": "MAIN_TOOLS.md",
}
@dataclass(frozen=True, slots=True)
class ProvisionOptions:
@@ -305,7 +264,7 @@ def _build_context(
workspace_path = _workspace_path(agent, workspace_root)
session_key = agent.openclaw_session_id or ""
base_url = settings.base_url or "REPLACE_WITH_BASE_URL"
main_session_key = gateway_agent_session_key(gateway)
main_session_key = GatewayAgentIdentity.session_key(gateway)
identity_profile: dict[str, Any] = {}
if isinstance(agent.identity_profile, dict):
identity_profile = agent.identity_profile
@@ -401,7 +360,7 @@ def _build_main_context(
"session_key": agent.openclaw_session_id or "",
"base_url": base_url,
"auth_token": auth_token,
"main_session_key": gateway_agent_session_key(gateway),
"main_session_key": GatewayAgentIdentity.session_key(gateway),
"workspace_root": gateway.workspace_root or "",
"user_name": (user.name or "") if user else "",
"user_preferred_name": preferred_name,
@@ -876,7 +835,7 @@ class GatewayMainAgentLifecycleManager(BaseAgentLifecycleManager):
"""Provisioning manager for organization gateway-main agents."""
def _agent_id(self, agent: Agent) -> str:
return gateway_openclaw_agent_id(self._gateway)
return GatewayAgentIdentity.openclaw_agent_id(self._gateway)
def _build_context(
self,
@@ -974,7 +933,7 @@ async def provision_main_agent(
gateway = request.gateway
if not gateway.url:
return
session_key = (request.session_key or gateway_agent_session_key(gateway) or "").strip()
session_key = (request.session_key or GatewayAgentIdentity.session_key(gateway) or "").strip()
if not session_key:
msg = "gateway main agent session_key is required"
raise ValueError(msg)
@@ -1008,3 +967,683 @@ async def cleanup_agent(
with suppress(OpenClawGatewayError):
await control_plane.delete_agent_session(session_key)
return None
_T = TypeVar("_T")
@dataclass(frozen=True)
class GatewayTemplateSyncOptions:
"""Runtime options controlling gateway template synchronization."""
user: User | None
include_main: bool = True
reset_sessions: bool = False
rotate_tokens: bool = False
force_bootstrap: bool = False
board_id: UUID | None = None
@dataclass(frozen=True)
class _SyncContext:
"""Shared state passed to sync helper functions."""
session: AsyncSession
gateway: Gateway
config: GatewayClientConfig
backoff: _GatewayBackoff
options: GatewayTemplateSyncOptions
def _is_transient_gateway_error(exc: Exception) -> bool:
if not isinstance(exc, OpenClawGatewayError):
return False
message = str(exc).lower()
if not message:
return False
if any(marker in message for marker in _NON_TRANSIENT_GATEWAY_ERROR_MARKERS):
return False
return ("503" in message and "websocket" in message) or any(
marker in message for marker in _TRANSIENT_GATEWAY_ERROR_MARKERS
)
def _gateway_timeout_message(
exc: OpenClawGatewayError,
*,
timeout_s: float,
context: str,
) -> str:
rounded_timeout = int(timeout_s)
timeout_text = f"{rounded_timeout} seconds"
if rounded_timeout >= 120:
timeout_text = f"{rounded_timeout // 60} minutes"
return f"Gateway unreachable after {timeout_text} ({context} timeout). Last error: {exc}"
class _GatewayBackoff:
def __init__(
self,
*,
timeout_s: float = 10 * 60,
base_delay_s: float = 0.75,
max_delay_s: float = 30.0,
jitter: float = 0.2,
timeout_context: str = "gateway operation",
) -> None:
self._timeout_s = timeout_s
self._base_delay_s = base_delay_s
self._max_delay_s = max_delay_s
self._jitter = jitter
self._timeout_context = timeout_context
self._delay_s = base_delay_s
def reset(self) -> None:
self._delay_s = self._base_delay_s
@staticmethod
async def _attempt(
fn: Callable[[], Awaitable[_T]],
) -> tuple[_T | None, OpenClawGatewayError | None]:
try:
return await fn(), None
except OpenClawGatewayError as exc:
return None, exc
async def run(self, fn: Callable[[], Awaitable[_T]]) -> _T:
# Use per-call deadlines so long-running syncs can still tolerate a later
# gateway restart without having an already-expired retry window.
deadline_s = asyncio.get_running_loop().time() + self._timeout_s
while True:
value, error = await self._attempt(fn)
if error is not None:
exc = error
if not _is_transient_gateway_error(exc):
raise exc
now = asyncio.get_running_loop().time()
remaining = deadline_s - now
if remaining <= 0:
raise TimeoutError(
_gateway_timeout_message(
exc,
timeout_s=self._timeout_s,
context=self._timeout_context,
),
) from exc
sleep_s = min(self._delay_s, remaining)
if self._jitter:
sleep_s *= 1.0 + _SECURE_RANDOM.uniform(
-self._jitter,
self._jitter,
)
sleep_s = max(0.0, min(sleep_s, remaining))
await asyncio.sleep(sleep_s)
self._delay_s = min(self._delay_s * 2.0, self._max_delay_s)
continue
self.reset()
if value is None:
msg = "Gateway retry produced no value without an error"
raise RuntimeError(msg)
return value
async def _with_gateway_retry(
fn: Callable[[], Awaitable[_T]],
*,
backoff: _GatewayBackoff,
) -> _T:
return await backoff.run(fn)
async def _with_coordination_gateway_retry(fn: Callable[[], Awaitable[_T]]) -> _T:
return await _with_gateway_retry(
fn,
backoff=_GatewayBackoff(
timeout_s=_COORDINATION_GATEWAY_TIMEOUT_S,
base_delay_s=_COORDINATION_GATEWAY_BASE_DELAY_S,
max_delay_s=_COORDINATION_GATEWAY_MAX_DELAY_S,
jitter=0.15,
timeout_context="gateway coordination",
),
)
def _parse_tools_md(content: str) -> dict[str, str]:
values: dict[str, str] = {}
for raw in content.splitlines():
line = raw.strip()
if not line or line.startswith("#"):
continue
match = _TOOLS_KV_RE.match(line)
if not match:
continue
values[match.group("key")] = match.group("value").strip()
return values
async def _get_agent_file(
*,
agent_gateway_id: str,
name: str,
config: GatewayClientConfig,
backoff: _GatewayBackoff | None = None,
) -> str | None:
try:
async def _do_get() -> object:
return await openclaw_call(
"agents.files.get",
{"agentId": agent_gateway_id, "name": name},
config=config,
)
payload = await (backoff.run(_do_get) if backoff else _do_get())
except OpenClawGatewayError:
return None
if isinstance(payload, str):
return payload
if isinstance(payload, dict):
content = payload.get("content")
if isinstance(content, str):
return content
file_obj = payload.get("file")
if isinstance(file_obj, dict):
nested = file_obj.get("content")
if isinstance(nested, str):
return nested
return None
async def _get_existing_auth_token(
*,
agent_gateway_id: str,
config: GatewayClientConfig,
backoff: _GatewayBackoff | None = None,
) -> str | None:
tools = await _get_agent_file(
agent_gateway_id=agent_gateway_id,
name="TOOLS.md",
config=config,
backoff=backoff,
)
if not tools:
return None
values = _parse_tools_md(tools)
token = values.get("AUTH_TOKEN")
if not token:
return None
token = token.strip()
return token or None
async def _paused_board_ids(session: AsyncSession, board_ids: list[UUID]) -> set[UUID]:
if not board_ids:
return set()
commands = {"/pause", "/resume"}
statement = (
select(BoardMemory.board_id, BoardMemory.content)
.where(col(BoardMemory.board_id).in_(board_ids))
.where(col(BoardMemory.is_chat).is_(True))
.where(func.lower(func.trim(col(BoardMemory.content))).in_(commands))
.order_by(col(BoardMemory.board_id), col(BoardMemory.created_at).desc())
# Postgres: DISTINCT ON (board_id) to get latest command per board.
.distinct(col(BoardMemory.board_id))
)
paused: set[UUID] = set()
for board_id, content in await session.exec(statement):
cmd = (content or "").strip().lower()
if cmd == "/pause":
paused.add(board_id)
return paused
def _append_sync_error(
result: GatewayTemplatesSyncResult,
*,
message: str,
agent: Agent | None = None,
board: Board | None = None,
) -> None:
result.errors.append(
GatewayTemplatesSyncError(
agent_id=agent.id if agent else None,
agent_name=agent.name if agent else None,
board_id=board.id if board else None,
message=message,
),
)
async def _rotate_agent_token(session: AsyncSession, agent: Agent) -> str:
token = generate_agent_token()
agent.agent_token_hash = hash_agent_token(token)
agent.updated_at = utcnow()
session.add(agent)
await session.commit()
await session.refresh(agent)
return token
async def _ping_gateway(ctx: _SyncContext, result: GatewayTemplatesSyncResult) -> bool:
try:
async def _do_ping() -> object:
return await openclaw_call("agents.list", config=ctx.config)
await ctx.backoff.run(_do_ping)
except (TimeoutError, OpenClawGatewayError) as exc:
_append_sync_error(result, message=str(exc))
return False
else:
return True
def _base_result(
gateway: Gateway,
*,
include_main: bool,
reset_sessions: bool,
) -> GatewayTemplatesSyncResult:
return GatewayTemplatesSyncResult(
gateway_id=gateway.id,
include_main=include_main,
reset_sessions=reset_sessions,
agents_updated=0,
agents_skipped=0,
main_updated=False,
)
def _boards_by_id(
boards: list[Board],
*,
board_id: UUID | None,
) -> dict[UUID, Board] | None:
boards_by_id = {board.id: board for board in boards}
if board_id is None:
return boards_by_id
board = boards_by_id.get(board_id)
if board is None:
return None
return {board_id: board}
async def _resolve_agent_auth_token(
ctx: _SyncContext,
result: GatewayTemplatesSyncResult,
agent: Agent,
board: Board | None,
*,
agent_gateway_id: str,
) -> tuple[str | None, bool]:
try:
auth_token = await _get_existing_auth_token(
agent_gateway_id=agent_gateway_id,
config=ctx.config,
backoff=ctx.backoff,
)
except TimeoutError as exc:
_append_sync_error(result, agent=agent, board=board, message=str(exc))
return None, True
if not auth_token:
if not ctx.options.rotate_tokens:
result.agents_skipped += 1
_append_sync_error(
result,
agent=agent,
board=board,
message=(
"Skipping agent: unable to read AUTH_TOKEN from TOOLS.md "
"(run with rotate_tokens=true to re-key)."
),
)
return None, False
auth_token = await _rotate_agent_token(ctx.session, agent)
if agent.agent_token_hash and not verify_agent_token(
auth_token,
agent.agent_token_hash,
):
if ctx.options.rotate_tokens:
auth_token = await _rotate_agent_token(ctx.session, agent)
else:
_append_sync_error(
result,
agent=agent,
board=board,
message=(
"Warning: AUTH_TOKEN in TOOLS.md does not match backend "
"token hash (agent auth may be broken)."
),
)
return auth_token, False
async def _sync_one_agent(
ctx: _SyncContext,
result: GatewayTemplatesSyncResult,
agent: Agent,
board: Board,
) -> bool:
auth_token, fatal = await _resolve_agent_auth_token(
ctx,
result,
agent,
board,
agent_gateway_id=_agent_key(agent),
)
if fatal:
return True
if not auth_token:
return False
try:
async def _do_provision() -> bool:
await provision_agent(
agent,
AgentProvisionRequest(
board=board,
gateway=ctx.gateway,
auth_token=auth_token,
user=ctx.options.user,
options=ProvisionOptions(
action="update",
force_bootstrap=ctx.options.force_bootstrap,
reset_session=ctx.options.reset_sessions,
),
),
)
return True
await _with_gateway_retry(_do_provision, backoff=ctx.backoff)
result.agents_updated += 1
except TimeoutError as exc: # pragma: no cover - gateway/network dependent
result.agents_skipped += 1
_append_sync_error(result, agent=agent, board=board, message=str(exc))
return True
except (OSError, RuntimeError, ValueError) as exc: # pragma: no cover
result.agents_skipped += 1
_append_sync_error(
result,
agent=agent,
board=board,
message=f"Failed to sync templates: {exc}",
)
return False
else:
return False
async def _sync_main_agent(
ctx: _SyncContext,
result: GatewayTemplatesSyncResult,
) -> bool:
main_session_key = GatewayAgentIdentity.session_key(ctx.gateway)
main_agent = (
await Agent.objects.all()
.filter(col(Agent.gateway_id) == ctx.gateway.id)
.filter(col(Agent.board_id).is_(None))
.first(ctx.session)
)
if main_agent is None:
_append_sync_error(
result,
message="Gateway agent record not found; " "skipping gateway agent template sync.",
)
return True
main_gateway_agent_id = GatewayAgentIdentity.openclaw_agent_id(ctx.gateway)
token, fatal = await _resolve_agent_auth_token(
ctx,
result,
main_agent,
board=None,
agent_gateway_id=main_gateway_agent_id,
)
if fatal:
return True
if not token:
_append_sync_error(
result,
agent=main_agent,
message="Skipping gateway agent: unable to read AUTH_TOKEN from TOOLS.md.",
)
return True
stop_sync = False
try:
async def _do_provision_main() -> bool:
await provision_main_agent(
main_agent,
MainAgentProvisionRequest(
gateway=ctx.gateway,
auth_token=token,
user=ctx.options.user,
session_key=main_session_key,
options=ProvisionOptions(
action="update",
force_bootstrap=ctx.options.force_bootstrap,
reset_session=ctx.options.reset_sessions,
),
),
)
return True
await _with_gateway_retry(_do_provision_main, backoff=ctx.backoff)
except TimeoutError as exc: # pragma: no cover - gateway/network dependent
_append_sync_error(result, agent=main_agent, message=str(exc))
stop_sync = True
except (OSError, RuntimeError, ValueError) as exc: # pragma: no cover
_append_sync_error(
result,
agent=main_agent,
message=f"Failed to sync gateway agent templates: {exc}",
)
else:
result.main_updated = True
return stop_sync
async def sync_gateway_templates(
session: AsyncSession,
gateway: Gateway,
options: GatewayTemplateSyncOptions,
) -> GatewayTemplatesSyncResult:
"""Synchronize AGENTS/TOOLS/etc templates to gateway-connected agents."""
result = _base_result(
gateway,
include_main=options.include_main,
reset_sessions=options.reset_sessions,
)
if not gateway.url:
_append_sync_error(
result,
message="Gateway URL is not configured for this gateway.",
)
return result
ctx = _SyncContext(
session=session,
gateway=gateway,
config=GatewayClientConfig(url=gateway.url, token=gateway.token),
backoff=_GatewayBackoff(timeout_s=10 * 60, timeout_context="template sync"),
options=options,
)
if not await _ping_gateway(ctx, result):
return result
boards = await Board.objects.filter_by(gateway_id=gateway.id).all(session)
boards_by_id = _boards_by_id(boards, board_id=options.board_id)
if boards_by_id is None:
_append_sync_error(
result,
message="Board does not belong to this gateway.",
)
return result
paused_board_ids = await _paused_board_ids(session, list(boards_by_id.keys()))
if boards_by_id:
agents = await (
Agent.objects.by_field_in("board_id", list(boards_by_id.keys()))
.order_by(col(Agent.created_at).asc())
.all(session)
)
else:
agents = []
stop_sync = False
for agent in agents:
board = boards_by_id.get(agent.board_id) if agent.board_id is not None else None
if board is None:
result.agents_skipped += 1
_append_sync_error(
result,
agent=agent,
message="Skipping agent: board not found for agent.",
)
continue
if board.id in paused_board_ids:
result.agents_skipped += 1
continue
stop_sync = await _sync_one_agent(ctx, result, agent, board)
if stop_sync:
break
if not stop_sync and options.include_main:
await _sync_main_agent(ctx, result)
return result
# Board lead lifecycle primitives consolidated from app.services.board_leads.
def lead_session_key(board: Board) -> str:
"""Return the deterministic main session key for a board lead agent."""
return f"agent:lead-{board.id}:main"
def lead_agent_name(_: Board) -> str:
"""Return the default display name for board lead agents."""
return "Lead Agent"
@dataclass(frozen=True, slots=True)
class LeadAgentOptions:
"""Optional overrides for board-lead provisioning behavior."""
agent_name: str | None = None
identity_profile: dict[str, str] | None = None
action: str = "provision"
@dataclass(frozen=True, slots=True)
class LeadAgentRequest:
"""Inputs required to ensure or provision a board lead agent."""
board: Board
gateway: Gateway
config: GatewayClientConfig
user: User | None
options: LeadAgentOptions = field(default_factory=LeadAgentOptions)
async def ensure_board_lead_agent(
session: AsyncSession,
*,
request: LeadAgentRequest,
) -> tuple[Agent, bool]:
"""Ensure a board has a lead agent; return `(agent, created)`."""
board = request.board
config_options = request.options
existing = (
await session.exec(
select(Agent)
.where(Agent.board_id == board.id)
.where(col(Agent.is_board_lead).is_(True)),
)
).first()
if existing:
desired_name = config_options.agent_name or lead_agent_name(board)
changed = False
if existing.name != desired_name:
existing.name = desired_name
changed = True
if existing.gateway_id != request.gateway.id:
existing.gateway_id = request.gateway.id
changed = True
desired_session_key = lead_session_key(board)
if not existing.openclaw_session_id:
existing.openclaw_session_id = desired_session_key
changed = True
if changed:
existing.updated_at = utcnow()
session.add(existing)
await session.commit()
await session.refresh(existing)
return existing, False
merged_identity_profile: dict[str, Any] = {
"role": "Board Lead",
"communication_style": "direct, concise, practical",
"emoji": ":gear:",
}
if config_options.identity_profile:
merged_identity_profile.update(
{
key: value.strip()
for key, value in config_options.identity_profile.items()
if value.strip()
},
)
agent = Agent(
name=config_options.agent_name or lead_agent_name(board),
status="provisioning",
board_id=board.id,
gateway_id=request.gateway.id,
is_board_lead=True,
heartbeat_config=DEFAULT_HEARTBEAT_CONFIG.copy(),
identity_profile=merged_identity_profile,
openclaw_session_id=lead_session_key(board),
provision_requested_at=utcnow(),
provision_action=config_options.action,
)
raw_token = generate_agent_token()
agent.agent_token_hash = hash_agent_token(raw_token)
session.add(agent)
await session.commit()
await session.refresh(agent)
try:
await provision_agent(
agent,
AgentProvisionRequest(
board=board,
gateway=request.gateway,
auth_token=raw_token,
user=request.user,
options=ProvisionOptions(action=config_options.action),
),
)
if agent.openclaw_session_id:
await ensure_session(
agent.openclaw_session_id,
config=request.config,
label=agent.name,
)
await send_message(
(
f"Hello {agent.name}. Your workspace has been provisioned.\n\n"
"Start the agent, run BOOT.md, and if BOOTSTRAP.md exists run "
"it once then delete it. Begin heartbeats after startup."
),
session_key=agent.openclaw_session_id,
config=request.config,
deliver=True,
)
except OpenClawGatewayError:
# Best-effort provisioning. The board/agent rows should still exist.
pass
return agent, True

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,98 @@
"""Shared OpenClaw lifecycle primitives."""
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
from uuid import UUID, uuid4
from fastapi import HTTPException, status
from app.integrations.openclaw_gateway import GatewayConfig as GatewayClientConfig
from app.integrations.openclaw_gateway import ensure_session, send_message
from app.models.boards import Board
from app.models.gateways import Gateway
from app.services.openclaw.constants import (
_GATEWAY_AGENT_PREFIX,
_GATEWAY_AGENT_SUFFIX,
_GATEWAY_OPENCLAW_AGENT_PREFIX,
)
if TYPE_CHECKING:
from sqlmodel.ext.asyncio.session import AsyncSession
class GatewayAgentIdentity:
"""Naming and identity rules for Mission Control gateway-main agents."""
@classmethod
def session_key_for_id(cls, gateway_id: UUID) -> str:
return f"{_GATEWAY_AGENT_PREFIX}{gateway_id}{_GATEWAY_AGENT_SUFFIX}"
@classmethod
def session_key(cls, gateway: Gateway) -> str:
return cls.session_key_for_id(gateway.id)
@classmethod
def openclaw_agent_id_for_id(cls, gateway_id: UUID) -> str:
return f"{_GATEWAY_OPENCLAW_AGENT_PREFIX}{gateway_id}"
@classmethod
def openclaw_agent_id(cls, gateway: Gateway) -> str:
return cls.openclaw_agent_id_for_id(gateway.id)
async def optional_gateway_config_for_board(
session: AsyncSession,
board: Board,
) -> GatewayClientConfig | None:
"""Return gateway client config when board has a reachable configured gateway."""
if board.gateway_id is None:
return None
gateway = await Gateway.objects.by_id(board.gateway_id).first(session)
if gateway is None or not gateway.url:
return None
return GatewayClientConfig(url=gateway.url, token=gateway.token)
async def require_gateway_config_for_board(
session: AsyncSession,
board: Board,
) -> tuple[Gateway, GatewayClientConfig]:
"""Resolve board gateway and config, raising 422 when unavailable."""
if board.gateway_id is None:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail="Board is not attached to a gateway",
)
gateway = await Gateway.objects.by_id(board.gateway_id).first(session)
if gateway is None or not gateway.url:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail="Gateway is not configured for this board",
)
return gateway, GatewayClientConfig(url=gateway.url, token=gateway.token)
async def send_gateway_agent_message(
*,
session_key: str,
config: GatewayClientConfig,
agent_name: str,
message: str,
deliver: bool = False,
) -> None:
"""Ensure session and dispatch a message to an agent session."""
await ensure_session(session_key, config=config, label=agent_name)
await send_message(message, session_key=session_key, config=config, deliver=deliver)
def resolve_trace_id(correlation_id: str | None, *, prefix: str) -> str:
"""Resolve a stable trace id from correlation id or generate a scoped fallback."""
normalized = (correlation_id or "").strip()
if normalized:
return normalized
return f"{prefix}:{uuid4().hex[:12]}"
logger = logging.getLogger(__name__)

View File

@@ -1,593 +0,0 @@
"""Gateway template synchronization orchestration."""
from __future__ import annotations
import asyncio
import random
import re
from collections.abc import Awaitable, Callable
from dataclasses import dataclass
from typing import TypeVar
from uuid import UUID, uuid4
from sqlalchemy import func
from sqlmodel import col, select
from sqlmodel.ext.asyncio.session import AsyncSession
from app.core.agent_tokens import generate_agent_token, hash_agent_token, verify_agent_token
from app.core.time import utcnow
from app.integrations.openclaw_gateway import GatewayConfig as GatewayClientConfig
from app.integrations.openclaw_gateway import OpenClawGatewayError, openclaw_call
from app.models.agents import Agent
from app.models.board_memory import BoardMemory
from app.models.boards import Board
from app.models.gateways import Gateway
from app.models.users import User
from app.schemas.gateways import GatewayTemplatesSyncError, GatewayTemplatesSyncResult
from app.services.agent_provisioning import (
AgentProvisionRequest,
MainAgentProvisionRequest,
ProvisionOptions,
provision_agent,
provision_main_agent,
)
from app.services.gateway_agents import (
gateway_agent_session_key,
gateway_openclaw_agent_id,
)
_TOOLS_KV_RE = re.compile(r"^(?P<key>[A-Z0-9_]+)=(?P<value>.*)$")
SESSION_KEY_PARTS_MIN = 2
_NON_TRANSIENT_GATEWAY_ERROR_MARKERS = ("unsupported file",)
_TRANSIENT_GATEWAY_ERROR_MARKERS = (
"connect call failed",
"connection refused",
"errno 111",
"econnrefused",
"did not receive a valid http response",
"no route to host",
"network is unreachable",
"host is down",
"name or service not known",
"received 1012",
"service restart",
"http 503",
"http 502",
"http 504",
"temporar",
"timeout",
"timed out",
"connection closed",
"connection reset",
)
T = TypeVar("T")
_SECURE_RANDOM = random.SystemRandom()
_RUNTIME_TYPE_REFERENCES = (Awaitable, Callable, AsyncSession, Gateway, User, UUID)
@dataclass(frozen=True)
class GatewayTemplateSyncOptions:
"""Runtime options controlling gateway template synchronization."""
user: User | None
include_main: bool = True
reset_sessions: bool = False
rotate_tokens: bool = False
force_bootstrap: bool = False
board_id: UUID | None = None
@dataclass(frozen=True)
class _SyncContext:
"""Shared state passed to sync helper functions."""
session: AsyncSession
gateway: Gateway
config: GatewayClientConfig
backoff: _GatewayBackoff
options: GatewayTemplateSyncOptions
def _slugify(value: str) -> str:
slug = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-")
return slug or uuid4().hex
def _is_transient_gateway_error(exc: Exception) -> bool:
if not isinstance(exc, OpenClawGatewayError):
return False
message = str(exc).lower()
if not message:
return False
if any(marker in message for marker in _NON_TRANSIENT_GATEWAY_ERROR_MARKERS):
return False
return ("503" in message and "websocket" in message) or any(
marker in message for marker in _TRANSIENT_GATEWAY_ERROR_MARKERS
)
def _gateway_timeout_message(exc: OpenClawGatewayError) -> str:
return "Gateway unreachable after 10 minutes (template sync timeout). " f"Last error: {exc}"
class _GatewayBackoff:
def __init__(
self,
*,
timeout_s: float = 10 * 60,
base_delay_s: float = 0.75,
max_delay_s: float = 30.0,
jitter: float = 0.2,
) -> None:
self._timeout_s = timeout_s
self._base_delay_s = base_delay_s
self._max_delay_s = max_delay_s
self._jitter = jitter
self._delay_s = base_delay_s
def reset(self) -> None:
self._delay_s = self._base_delay_s
@staticmethod
async def _attempt(
fn: Callable[[], Awaitable[T]],
) -> tuple[T | None, OpenClawGatewayError | None]:
try:
return await fn(), None
except OpenClawGatewayError as exc:
return None, exc
async def run(self, fn: Callable[[], Awaitable[T]]) -> T:
# Use per-call deadlines so long-running syncs can still tolerate a later
# gateway restart without having an already-expired retry window.
deadline_s = asyncio.get_running_loop().time() + self._timeout_s
while True:
value, error = await self._attempt(fn)
if error is not None:
exc = error
if not _is_transient_gateway_error(exc):
raise exc
now = asyncio.get_running_loop().time()
remaining = deadline_s - now
if remaining <= 0:
raise TimeoutError(_gateway_timeout_message(exc)) from exc
sleep_s = min(self._delay_s, remaining)
if self._jitter:
sleep_s *= 1.0 + _SECURE_RANDOM.uniform(
-self._jitter,
self._jitter,
)
sleep_s = max(0.0, min(sleep_s, remaining))
await asyncio.sleep(sleep_s)
self._delay_s = min(self._delay_s * 2.0, self._max_delay_s)
continue
self.reset()
if value is None:
msg = "Gateway retry produced no value without an error"
raise RuntimeError(msg)
return value
async def _with_gateway_retry(
fn: Callable[[], Awaitable[T]],
*,
backoff: _GatewayBackoff,
) -> T:
return await backoff.run(fn)
def _gateway_agent_id(agent: Agent) -> str:
session_key = agent.openclaw_session_id or ""
if session_key.startswith("agent:"):
parts = session_key.split(":")
if len(parts) >= SESSION_KEY_PARTS_MIN and parts[1]:
return parts[1]
return _slugify(agent.name)
def _parse_tools_md(content: str) -> dict[str, str]:
values: dict[str, str] = {}
for raw in content.splitlines():
line = raw.strip()
if not line or line.startswith("#"):
continue
match = _TOOLS_KV_RE.match(line)
if not match:
continue
values[match.group("key")] = match.group("value").strip()
return values
async def _get_agent_file(
*,
agent_gateway_id: str,
name: str,
config: GatewayClientConfig,
backoff: _GatewayBackoff | None = None,
) -> str | None:
try:
async def _do_get() -> object:
return await openclaw_call(
"agents.files.get",
{"agentId": agent_gateway_id, "name": name},
config=config,
)
payload = await (backoff.run(_do_get) if backoff else _do_get())
except OpenClawGatewayError:
return None
if isinstance(payload, str):
return payload
if isinstance(payload, dict):
content = payload.get("content")
if isinstance(content, str):
return content
file_obj = payload.get("file")
if isinstance(file_obj, dict):
nested = file_obj.get("content")
if isinstance(nested, str):
return nested
return None
async def _get_existing_auth_token(
*,
agent_gateway_id: str,
config: GatewayClientConfig,
backoff: _GatewayBackoff | None = None,
) -> str | None:
tools = await _get_agent_file(
agent_gateway_id=agent_gateway_id,
name="TOOLS.md",
config=config,
backoff=backoff,
)
if not tools:
return None
values = _parse_tools_md(tools)
token = values.get("AUTH_TOKEN")
if not token:
return None
token = token.strip()
return token or None
async def _paused_board_ids(session: AsyncSession, board_ids: list[UUID]) -> set[UUID]:
if not board_ids:
return set()
commands = {"/pause", "/resume"}
statement = (
select(BoardMemory.board_id, BoardMemory.content)
.where(col(BoardMemory.board_id).in_(board_ids))
.where(col(BoardMemory.is_chat).is_(True))
.where(func.lower(func.trim(col(BoardMemory.content))).in_(commands))
.order_by(col(BoardMemory.board_id), col(BoardMemory.created_at).desc())
# Postgres: DISTINCT ON (board_id) to get latest command per board.
.distinct(col(BoardMemory.board_id))
)
paused: set[UUID] = set()
for board_id, content in await session.exec(statement):
cmd = (content or "").strip().lower()
if cmd == "/pause":
paused.add(board_id)
return paused
def _append_sync_error(
result: GatewayTemplatesSyncResult,
*,
message: str,
agent: Agent | None = None,
board: Board | None = None,
) -> None:
result.errors.append(
GatewayTemplatesSyncError(
agent_id=agent.id if agent else None,
agent_name=agent.name if agent else None,
board_id=board.id if board else None,
message=message,
),
)
async def _rotate_agent_token(session: AsyncSession, agent: Agent) -> str:
token = generate_agent_token()
agent.agent_token_hash = hash_agent_token(token)
agent.updated_at = utcnow()
session.add(agent)
await session.commit()
await session.refresh(agent)
return token
async def _ping_gateway(ctx: _SyncContext, result: GatewayTemplatesSyncResult) -> bool:
try:
async def _do_ping() -> object:
return await openclaw_call("agents.list", config=ctx.config)
await ctx.backoff.run(_do_ping)
except (TimeoutError, OpenClawGatewayError) as exc:
_append_sync_error(result, message=str(exc))
return False
else:
return True
def _base_result(
gateway: Gateway,
*,
include_main: bool,
reset_sessions: bool,
) -> GatewayTemplatesSyncResult:
return GatewayTemplatesSyncResult(
gateway_id=gateway.id,
include_main=include_main,
reset_sessions=reset_sessions,
agents_updated=0,
agents_skipped=0,
main_updated=False,
)
def _boards_by_id(
boards: list[Board],
*,
board_id: UUID | None,
) -> dict[UUID, Board] | None:
boards_by_id = {board.id: board for board in boards}
if board_id is None:
return boards_by_id
board = boards_by_id.get(board_id)
if board is None:
return None
return {board_id: board}
async def _resolve_agent_auth_token(
ctx: _SyncContext,
result: GatewayTemplatesSyncResult,
agent: Agent,
board: Board | None,
*,
agent_gateway_id: str,
) -> tuple[str | None, bool]:
try:
auth_token = await _get_existing_auth_token(
agent_gateway_id=agent_gateway_id,
config=ctx.config,
backoff=ctx.backoff,
)
except TimeoutError as exc:
_append_sync_error(result, agent=agent, board=board, message=str(exc))
return None, True
if not auth_token:
if not ctx.options.rotate_tokens:
result.agents_skipped += 1
_append_sync_error(
result,
agent=agent,
board=board,
message=(
"Skipping agent: unable to read AUTH_TOKEN from TOOLS.md "
"(run with rotate_tokens=true to re-key)."
),
)
return None, False
auth_token = await _rotate_agent_token(ctx.session, agent)
if agent.agent_token_hash and not verify_agent_token(
auth_token,
agent.agent_token_hash,
):
if ctx.options.rotate_tokens:
auth_token = await _rotate_agent_token(ctx.session, agent)
else:
_append_sync_error(
result,
agent=agent,
board=board,
message=(
"Warning: AUTH_TOKEN in TOOLS.md does not match backend "
"token hash (agent auth may be broken)."
),
)
return auth_token, False
async def _sync_one_agent(
ctx: _SyncContext,
result: GatewayTemplatesSyncResult,
agent: Agent,
board: Board,
) -> bool:
auth_token, fatal = await _resolve_agent_auth_token(
ctx,
result,
agent,
board,
agent_gateway_id=_gateway_agent_id(agent),
)
if fatal:
return True
if not auth_token:
return False
try:
async def _do_provision() -> None:
await provision_agent(
agent,
AgentProvisionRequest(
board=board,
gateway=ctx.gateway,
auth_token=auth_token,
user=ctx.options.user,
options=ProvisionOptions(
action="update",
force_bootstrap=ctx.options.force_bootstrap,
reset_session=ctx.options.reset_sessions,
),
),
)
await _with_gateway_retry(_do_provision, backoff=ctx.backoff)
result.agents_updated += 1
except TimeoutError as exc: # pragma: no cover - gateway/network dependent
result.agents_skipped += 1
_append_sync_error(result, agent=agent, board=board, message=str(exc))
return True
except (OSError, RuntimeError, ValueError) as exc: # pragma: no cover
result.agents_skipped += 1
_append_sync_error(
result,
agent=agent,
board=board,
message=f"Failed to sync templates: {exc}",
)
return False
else:
return False
async def _sync_main_agent(
ctx: _SyncContext,
result: GatewayTemplatesSyncResult,
) -> bool:
main_session_key = gateway_agent_session_key(ctx.gateway)
main_agent = (
await Agent.objects.all()
.filter(col(Agent.gateway_id) == ctx.gateway.id)
.filter(col(Agent.board_id).is_(None))
.first(ctx.session)
)
if main_agent is None:
_append_sync_error(
result,
message=("Gateway agent record not found; " "skipping gateway agent template sync."),
)
return True
main_gateway_agent_id = gateway_openclaw_agent_id(ctx.gateway)
token, fatal = await _resolve_agent_auth_token(
ctx,
result,
main_agent,
board=None,
agent_gateway_id=main_gateway_agent_id,
)
if fatal:
return True
if not token:
_append_sync_error(
result,
agent=main_agent,
message="Skipping gateway agent: unable to read AUTH_TOKEN from TOOLS.md.",
)
return True
stop_sync = False
try:
async def _do_provision_main() -> None:
await provision_main_agent(
main_agent,
MainAgentProvisionRequest(
gateway=ctx.gateway,
auth_token=token,
user=ctx.options.user,
session_key=main_session_key,
options=ProvisionOptions(
action="update",
force_bootstrap=ctx.options.force_bootstrap,
reset_session=ctx.options.reset_sessions,
),
),
)
await _with_gateway_retry(_do_provision_main, backoff=ctx.backoff)
except TimeoutError as exc: # pragma: no cover - gateway/network dependent
_append_sync_error(result, agent=main_agent, message=str(exc))
stop_sync = True
except (OSError, RuntimeError, ValueError) as exc: # pragma: no cover
_append_sync_error(
result,
agent=main_agent,
message=f"Failed to sync gateway agent templates: {exc}",
)
else:
result.main_updated = True
return stop_sync
async def sync_gateway_templates(
session: AsyncSession,
gateway: Gateway,
options: GatewayTemplateSyncOptions,
) -> GatewayTemplatesSyncResult:
"""Synchronize AGENTS/TOOLS/etc templates to gateway-connected agents."""
result = _base_result(
gateway,
include_main=options.include_main,
reset_sessions=options.reset_sessions,
)
if not gateway.url:
_append_sync_error(
result,
message="Gateway URL is not configured for this gateway.",
)
return result
ctx = _SyncContext(
session=session,
gateway=gateway,
config=GatewayClientConfig(url=gateway.url, token=gateway.token),
backoff=_GatewayBackoff(timeout_s=10 * 60),
options=options,
)
if not await _ping_gateway(ctx, result):
return result
boards = await Board.objects.filter_by(gateway_id=gateway.id).all(session)
boards_by_id = _boards_by_id(boards, board_id=options.board_id)
if boards_by_id is None:
_append_sync_error(
result,
message="Board does not belong to this gateway.",
)
return result
paused_board_ids = await _paused_board_ids(session, list(boards_by_id.keys()))
if boards_by_id:
agents = await (
Agent.objects.by_field_in("board_id", list(boards_by_id.keys()))
.order_by(col(Agent.created_at).asc())
.all(session)
)
else:
agents = []
stop_sync = False
for agent in agents:
board = boards_by_id.get(agent.board_id) if agent.board_id is not None else None
if board is None:
result.agents_skipped += 1
_append_sync_error(
result,
agent=agent,
message="Skipping agent: board not found for agent.",
)
continue
if board.id in paused_board_ids:
result.agents_skipped += 1
continue
stop_sync = await _sync_one_agent(ctx, result, agent, board)
if stop_sync:
break
if not stop_sync and options.include_main:
await _sync_main_agent(ctx, result)
return result