From 82425edd69ff768a9e82558dd970599b45bc1e8f Mon Sep 17 00:00:00 2001 From: Abhimanyu Saharan Date: Tue, 10 Feb 2026 14:50:27 +0530 Subject: [PATCH] refactor: reorganize OpenClaw services and enhance session management --- backend/app/api/agent.py | 481 +-- backend/app/api/agents.py | 1242 +------ backend/app/api/board_group_memory.py | 33 +- backend/app/api/board_groups.py | 2 +- backend/app/api/board_memory.py | 34 +- backend/app/api/board_onboarding.py | 72 +- backend/app/api/boards.py | 65 +- backend/app/api/gateway.py | 306 +- backend/app/api/gateways.py | 386 +-- backend/app/api/tasks.py | 42 +- backend/app/services/board_leads.py | 159 - backend/app/services/board_snapshot.py | 34 +- backend/app/services/gateway_agents.py | 31 - backend/app/services/openclaw/__init__.py | 7 + backend/app/services/openclaw/constants.py | 120 + backend/app/services/openclaw/exceptions.py | 90 + .../provisioning.py} | 815 ++++- backend/app/services/openclaw/services.py | 2949 +++++++++++++++++ backend/app/services/openclaw/shared.py | 98 + backend/app/services/template_sync.py | 593 ---- backend/scripts/seed_demo.py | 4 +- backend/scripts/sync_gateway_templates.py | 2 +- .../tests/test_agent_provisioning_utils.py | 11 +- backend/tests/test_lifecycle_services.py | 258 ++ 24 files changed, 4454 insertions(+), 3380 deletions(-) delete mode 100644 backend/app/services/board_leads.py delete mode 100644 backend/app/services/gateway_agents.py create mode 100644 backend/app/services/openclaw/__init__.py create mode 100644 backend/app/services/openclaw/constants.py create mode 100644 backend/app/services/openclaw/exceptions.py rename backend/app/services/{agent_provisioning.py => openclaw/provisioning.py} (57%) create mode 100644 backend/app/services/openclaw/services.py create mode 100644 backend/app/services/openclaw/shared.py delete mode 100644 backend/app/services/template_sync.py create mode 100644 backend/tests/test_lifecycle_services.py diff --git a/backend/app/api/agent.py b/backend/app/api/agent.py index e3c30af5..0b6cfc35 100644 --- a/backend/app/api/agent.py +++ b/backend/app/api/agent.py @@ -2,7 +2,6 @@ from __future__ import annotations -import re from typing import TYPE_CHECKING, Any from uuid import UUID @@ -16,20 +15,10 @@ from app.api import board_onboarding as onboarding_api from app.api import tasks as tasks_api from app.api.deps import ActorContext, get_board_or_404, get_task_or_404 from app.core.agent_auth import AgentAuthContext, get_agent_auth_context -from app.core.config import settings -from app.core.time import utcnow from app.db.pagination import paginate from app.db.session import get_session -from app.integrations.openclaw_gateway import GatewayConfig as GatewayClientConfig -from app.integrations.openclaw_gateway import ( - OpenClawGatewayError, - ensure_session, - openclaw_call, - send_message, -) from app.models.agents import Agent from app.models.boards import Board -from app.models.gateways import Gateway from app.models.task_dependencies import TaskDependency from app.models.tasks import Task from app.schemas.agents import ( @@ -45,7 +34,6 @@ from app.schemas.board_onboarding import BoardOnboardingAgentUpdate, BoardOnboar from app.schemas.boards import BoardRead from app.schemas.common import OkResponse from app.schemas.gateway_coordination import ( - GatewayLeadBroadcastBoardResult, GatewayLeadBroadcastRequest, GatewayLeadBroadcastResponse, GatewayLeadMessageRequest, @@ -56,8 +44,7 @@ from app.schemas.gateway_coordination import ( from app.schemas.pagination import DefaultLimitOffsetPage from app.schemas.tasks import TaskCommentCreate, TaskCommentRead, TaskCreate, TaskRead, TaskUpdate from app.services.activity_log import record_activity -from app.services.board_leads import LeadAgentOptions, LeadAgentRequest, ensure_board_lead_agent -from app.services.gateway_agents import gateway_agent_session_key +from app.services.openclaw import AgentLifecycleService, GatewayCoordinationService from app.services.task_dependencies import ( blocked_by_dependency_ids, dependency_status_by_id, @@ -76,10 +63,6 @@ if TYPE_CHECKING: from app.models.board_onboarding import BoardOnboardingSession router = APIRouter(prefix="/agent", tags=["agent"]) - -_AGENT_SESSION_PREFIX = "agent:" -_SESSION_KEY_PARTS_MIN = 2 -_LEAD_SESSION_KEY_MISSING = "Lead agent has no session key" SESSION_DEP = Depends(get_session) AGENT_CTX_DEP = Depends(get_agent_auth_context) BOARD_DEP = Depends(get_board_or_404) @@ -100,18 +83,6 @@ def _coerce_agent_items(items: Sequence[Any]) -> list[Agent]: return agents -def _gateway_agent_id(agent: Agent) -> str: - session_key = agent.openclaw_session_id or "" - if session_key.startswith(_AGENT_SESSION_PREFIX): - parts = session_key.split(":") - if len(parts) >= _SESSION_KEY_PARTS_MIN and parts[1]: - return parts[1] - # Fall back to a stable slug derived from name (matches provisioning behavior). - value = agent.name.lower().strip() - value = re.sub(r"[^a-z0-9]+", "-", value).strip("-") - return value or str(agent.id) - - class SoulUpdateRequest(SQLModel): """Payload for updating an agent SOUL document.""" @@ -147,73 +118,11 @@ def _actor(agent_ctx: AgentAuthContext) -> ActorContext: return ActorContext(actor_type="agent", agent=agent_ctx.agent) -def _require_lead_session_key(lead: Agent) -> str: - session_key = lead.openclaw_session_id - if not session_key: - raise ValueError(_LEAD_SESSION_KEY_MISSING) - return session_key - - def _guard_board_access(agent_ctx: AgentAuthContext, board: Board) -> None: if agent_ctx.agent.board_id and agent_ctx.agent.board_id != board.id: raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) -async def _gateway_config(session: AsyncSession, board: Board) -> GatewayClientConfig: - if not board.gateway_id: - raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY) - gateway = await Gateway.objects.by_id(board.gateway_id).first(session) - if gateway is None or not gateway.url: - raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY) - return GatewayClientConfig(url=gateway.url, token=gateway.token) - - -async def _require_gateway_main( - session: AsyncSession, - agent: Agent, -) -> tuple[Gateway, GatewayClientConfig]: - if agent.board_id is not None: - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail="Only the dedicated gateway agent may call this endpoint.", - ) - gateway_id = agent.gateway_id - gateway = await Gateway.objects.by_id(gateway_id).first(session) - if gateway is None: - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail="Only the dedicated gateway agent may call this endpoint.", - ) - if agent.openclaw_session_id != gateway_agent_session_key(gateway): - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail="Only the dedicated gateway agent may call this endpoint.", - ) - if not gateway.url: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="Gateway url is required", - ) - return gateway, GatewayClientConfig(url=gateway.url, token=gateway.token) - - -async def _require_gateway_board( - session: AsyncSession, - *, - gateway: Gateway, - board_id: UUID | str, -) -> Board: - board = await Board.objects.by_id(board_id).first(session) - if board is None: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="Board not found", - ) - if board.gateway_id != gateway.id: - raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) - return board - - @router.get("/boards", response_model=DefaultLimitOffsetPage[BoardRead]) async def list_boards( session: AsyncSession = SESSION_DEP, @@ -256,8 +165,8 @@ async def list_agents( def _transform(items: Sequence[Any]) -> Sequence[Any]: agents = _coerce_agent_items(items) return [ - agents_api.to_agent_read( - agents_api.with_computed_status(agent), + AgentLifecycleService.to_agent_read( + AgentLifecycleService.with_computed_status(agent), ) for agent in agents ] @@ -560,47 +469,14 @@ async def nudge_agent( _guard_board_access(agent_ctx, board) if not agent_ctx.agent.is_board_lead: raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) - target = await Agent.objects.by_id(agent_id).first(session) - if target is None or (target.board_id and target.board_id != board.id): - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) - if not target.openclaw_session_id: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="Target agent has no session key", - ) - message = payload.message - config = await _gateway_config(session, board) - try: - await ensure_session( - target.openclaw_session_id, - config=config, - label=target.name, - ) - await send_message( - message, - session_key=target.openclaw_session_id, - config=config, - deliver=True, - ) - except OpenClawGatewayError as exc: - record_activity( - session, - event_type="agent.nudge.failed", - message=f"Nudge failed for {target.name}: {exc}", - agent_id=agent_ctx.agent.id, - ) - await session.commit() - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail=str(exc), - ) from exc - record_activity( - session, - event_type="agent.nudge.sent", - message=f"Nudge sent to {target.name}.", - agent_id=agent_ctx.agent.id, + coordination = GatewayCoordinationService(session) + await coordination.nudge_board_agent( + board=board, + actor_agent=agent_ctx.agent, + target_agent_id=agent_id, + message=payload.message, + correlation_id=f"nudge:{board.id}:{agent_id}", ) - await session.commit() return OkResponse() @@ -631,36 +507,11 @@ async def get_agent_soul( _guard_board_access(agent_ctx, board) if not agent_ctx.agent.is_board_lead and str(agent_ctx.agent.id) != agent_id: raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) - target = await Agent.objects.by_id(agent_id).first(session) - if target is None or (target.board_id and target.board_id != board.id): - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) - config = await _gateway_config(session, board) - gateway_id = _gateway_agent_id(target) - try: - payload = await openclaw_call( - "agents.files.get", - {"agentId": gateway_id, "name": "SOUL.md"}, - config=config, - ) - except OpenClawGatewayError as exc: - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail=str(exc), - ) from exc - if isinstance(payload, str): - return payload - if isinstance(payload, dict): - content = payload.get("content") - if isinstance(content, str): - return content - file_obj = payload.get("file") - if isinstance(file_obj, dict): - nested = file_obj.get("content") - if isinstance(nested, str): - return nested - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail="Invalid gateway response", + coordination = GatewayCoordinationService(session) + return await coordination.get_agent_soul( + board=board, + target_agent_id=agent_id, + correlation_id=f"soul.read:{board.id}:{agent_id}", ) @@ -676,48 +527,16 @@ async def update_agent_soul( _guard_board_access(agent_ctx, board) if not agent_ctx.agent.is_board_lead: raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) - target = await Agent.objects.by_id(agent_id).first(session) - if target is None or (target.board_id and target.board_id != board.id): - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) - config = await _gateway_config(session, board) - gateway_id = _gateway_agent_id(target) - content = payload.content.strip() - if not content: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="content is required", - ) - - # Persist the SOUL in the DB so future reprovision/update doesn't overwrite it. - target.soul_template = content - target.updated_at = utcnow() - session.add(target) - await session.commit() - try: - await openclaw_call( - "agents.files.set", - {"agentId": gateway_id, "name": "SOUL.md", "content": content}, - config=config, - ) - except OpenClawGatewayError as exc: - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail=str(exc), - ) from exc - reason = (payload.reason or "").strip() - source_url = (payload.source_url or "").strip() - note = f"SOUL.md updated for {target.name}." - if reason: - note = f"{note} Reason: {reason}" - if source_url: - note = f"{note} Source: {source_url}" - record_activity( - session, - event_type="agent.soul.updated", - message=note, - agent_id=agent_ctx.agent.id, + coordination = GatewayCoordinationService(session) + await coordination.update_agent_soul( + board=board, + target_agent_id=agent_id, + content=payload.content, + reason=payload.reason, + source_url=payload.source_url, + actor_agent_id=agent_ctx.agent.id, + correlation_id=f"soul.write:{board.id}:{agent_id}", ) - await session.commit() return OkResponse() @@ -732,89 +551,14 @@ async def ask_user_via_gateway_main( agent_ctx: AgentAuthContext = AGENT_CTX_DEP, ) -> GatewayMainAskUserResponse: """Route a lead's ask-user request through the dedicated gateway agent.""" - import json - _guard_board_access(agent_ctx, board) if not agent_ctx.agent.is_board_lead: raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) - - if not board.gateway_id: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="Board is not attached to a gateway", - ) - gateway = await Gateway.objects.by_id(board.gateway_id).first(session) - if gateway is None or not gateway.url: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="Gateway is not configured for this board", - ) - main_session_key = gateway_agent_session_key(gateway) - config = GatewayClientConfig(url=gateway.url, token=gateway.token) - - correlation = payload.correlation_id.strip() if payload.correlation_id else "" - correlation_line = f"Correlation ID: {correlation}\n" if correlation else "" - preferred_channel = (payload.preferred_channel or "").strip() - channel_line = f"Preferred channel: {preferred_channel}\n" if preferred_channel else "" - - tags = payload.reply_tags or ["gateway_main", "user_reply"] - tags_json = json.dumps(tags) - reply_source = payload.reply_source or "user_via_gateway_main" - base_url = settings.base_url or "http://localhost:8000" - - message = ( - "LEAD REQUEST: ASK USER\n" - f"Board: {board.name}\n" - f"Board ID: {board.id}\n" - f"From lead: {agent_ctx.agent.name}\n" - f"{correlation_line}" - f"{channel_line}\n" - f"{payload.content.strip()}\n\n" - "Please reach the user via your configured OpenClaw channel(s) " - "(Slack/SMS/etc).\n" - "If you cannot reach them there, post the question in Mission Control " - "board chat as a fallback.\n\n" - "When you receive the answer, reply in Mission Control by writing a " - "NON-chat memory item on this board:\n" - f"POST {base_url}/api/v1/agent/boards/{board.id}/memory\n" - f'Body: {{"content":"","tags":{tags_json},"source":"{reply_source}"}}\n' - "Do NOT reply in OpenClaw chat." - ) - - try: - await ensure_session(main_session_key, config=config, label="Gateway Agent") - await send_message(message, session_key=main_session_key, config=config, deliver=True) - except OpenClawGatewayError as exc: - record_activity( - session, - event_type="gateway.lead.ask_user.failed", - message=f"Lead user question failed for {board.name}: {exc}", - agent_id=agent_ctx.agent.id, - ) - await session.commit() - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail=str(exc), - ) from exc - - record_activity( - session, - event_type="gateway.lead.ask_user.sent", - message=f"Lead requested user info via gateway agent for board: {board.name}.", - agent_id=agent_ctx.agent.id, - ) - - main_agent = await Agent.objects.filter_by( - gateway_id=gateway.id, - board_id=None, - ).first(session) - - await session.commit() - - return GatewayMainAskUserResponse( - board_id=board.id, - main_agent_id=main_agent.id if main_agent else None, - main_agent_name=main_agent.name if main_agent else None, + coordination = GatewayCoordinationService(session) + return await coordination.ask_user_via_gateway_main( + board=board, + payload=payload, + actor_agent=agent_ctx.agent, ) @@ -829,76 +573,11 @@ async def message_gateway_board_lead( agent_ctx: AgentAuthContext = AGENT_CTX_DEP, ) -> GatewayLeadMessageResponse: """Send a gateway-main message to a single board lead agent.""" - import json - - gateway, config = await _require_gateway_main(session, agent_ctx.agent) - board = await _require_gateway_board(session, gateway=gateway, board_id=board_id) - lead, lead_created = await ensure_board_lead_agent( - session, - request=LeadAgentRequest( - board=board, - gateway=gateway, - config=config, - user=None, - options=LeadAgentOptions(action="provision"), - ), - ) - if not lead.openclaw_session_id: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="Lead agent has no session key", - ) - - base_url = settings.base_url or "http://localhost:8000" - header = "GATEWAY MAIN QUESTION" if payload.kind == "question" else "GATEWAY MAIN HANDOFF" - correlation = payload.correlation_id.strip() if payload.correlation_id else "" - correlation_line = f"Correlation ID: {correlation}\n" if correlation else "" - tags = payload.reply_tags or ["gateway_main", "lead_reply"] - tags_json = json.dumps(tags) - reply_source = payload.reply_source or "lead_to_gateway_main" - - message = ( - f"{header}\n" - f"Board: {board.name}\n" - f"Board ID: {board.id}\n" - f"From agent: {agent_ctx.agent.name}\n" - f"{correlation_line}\n" - f"{payload.content.strip()}\n\n" - "Reply to the gateway agent by writing a NON-chat memory item on this board:\n" - f"POST {base_url}/api/v1/agent/boards/{board.id}/memory\n" - f'Body: {{"content":"...","tags":{tags_json},"source":"{reply_source}"}}\n' - "Do NOT reply in OpenClaw chat." - ) - - try: - await ensure_session(lead.openclaw_session_id, config=config, label=lead.name) - await send_message(message, session_key=lead.openclaw_session_id, config=config) - except OpenClawGatewayError as exc: - record_activity( - session, - event_type="gateway.main.lead_message.failed", - message=f"Lead message failed for {board.name}: {exc}", - agent_id=agent_ctx.agent.id, - ) - await session.commit() - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail=str(exc), - ) from exc - - record_activity( - session, - event_type="gateway.main.lead_message.sent", - message=f"Sent {payload.kind} to lead for board: {board.name}.", - agent_id=agent_ctx.agent.id, - ) - await session.commit() - - return GatewayLeadMessageResponse( - board_id=board.id, - lead_agent_id=lead.id, - lead_agent_name=lead.name, - lead_created=lead_created, + coordination = GatewayCoordinationService(session) + return await coordination.message_gateway_board_lead( + actor_agent=agent_ctx.agent, + board_id=board_id, + payload=payload, ) @@ -912,92 +591,8 @@ async def broadcast_gateway_lead_message( agent_ctx: AgentAuthContext = AGENT_CTX_DEP, ) -> GatewayLeadBroadcastResponse: """Broadcast a gateway-main message to multiple board leads.""" - import json - - gateway, config = await _require_gateway_main(session, agent_ctx.agent) - - statement = ( - select(Board) - .where(col(Board.gateway_id) == gateway.id) - .order_by(col(Board.created_at).desc()) - ) - if payload.board_ids: - statement = statement.where(col(Board.id).in_(payload.board_ids)) - boards = list(await session.exec(statement)) - - base_url = settings.base_url or "http://localhost:8000" - header = "GATEWAY MAIN QUESTION" if payload.kind == "question" else "GATEWAY MAIN HANDOFF" - correlation = payload.correlation_id.strip() if payload.correlation_id else "" - correlation_line = f"Correlation ID: {correlation}\n" if correlation else "" - tags = payload.reply_tags or ["gateway_main", "lead_reply"] - tags_json = json.dumps(tags) - reply_source = payload.reply_source or "lead_to_gateway_main" - - results: list[GatewayLeadBroadcastBoardResult] = [] - sent = 0 - failed = 0 - - async def _send_to_board(target_board: Board) -> GatewayLeadBroadcastBoardResult: - try: - lead, _lead_created = await ensure_board_lead_agent( - session, - request=LeadAgentRequest( - board=target_board, - gateway=gateway, - config=config, - user=None, - options=LeadAgentOptions(action="provision"), - ), - ) - lead_session_key = _require_lead_session_key(lead) - message = ( - f"{header}\n" - f"Board: {target_board.name}\n" - f"Board ID: {target_board.id}\n" - f"From agent: {agent_ctx.agent.name}\n" - f"{correlation_line}\n" - f"{payload.content.strip()}\n\n" - "Reply to the gateway agent by writing a NON-chat memory item " - "on this board:\n" - f"POST {base_url}/api/v1/agent/boards/{target_board.id}/memory\n" - f'Body: {{"content":"...","tags":{tags_json},' - f'"source":"{reply_source}"}}\n' - "Do NOT reply in OpenClaw chat." - ) - await ensure_session(lead_session_key, config=config, label=lead.name) - await send_message(message, session_key=lead_session_key, config=config) - return GatewayLeadBroadcastBoardResult( - board_id=target_board.id, - lead_agent_id=lead.id, - lead_agent_name=lead.name, - ok=True, - ) - except (HTTPException, OpenClawGatewayError, ValueError) as exc: - return GatewayLeadBroadcastBoardResult( - board_id=target_board.id, - ok=False, - error=str(exc), - ) - - for board in boards: - board_result = await _send_to_board(board) - results.append(board_result) - if board_result.ok: - sent += 1 - else: - failed += 1 - - record_activity( - session, - event_type="gateway.main.lead_broadcast.sent", - message=f"Broadcast {payload.kind} to {sent} board leads (failed: {failed}).", - agent_id=agent_ctx.agent.id, - ) - await session.commit() - - return GatewayLeadBroadcastResponse( - ok=True, - sent=sent, - failed=failed, - results=results, + coordination = GatewayCoordinationService(session) + return await coordination.broadcast_gateway_lead_message( + actor_agent=agent_ctx.agent, + payload=payload, ) diff --git a/backend/app/api/agents.py b/backend/app/api/agents.py index 814d3ac8..ac45c57b 100644 --- a/backend/app/api/agents.py +++ b/backend/app/api/agents.py @@ -1,35 +1,17 @@ -"""Agent lifecycle, listing, heartbeat, and deletion API endpoints.""" +"""Thin API wrappers for async agent lifecycle operations.""" from __future__ import annotations -import asyncio -import json -import re from dataclasses import dataclass -from datetime import UTC, datetime, timedelta -from typing import TYPE_CHECKING, Any -from uuid import UUID, uuid4 +from typing import TYPE_CHECKING +from uuid import UUID -from fastapi import APIRouter, Depends, HTTPException, Query, Request, status -from sqlalchemy import asc, or_ -from sqlmodel import col, select +from fastapi import APIRouter, Depends, Query, Request from sse_starlette.sse import EventSourceResponse from app.api.deps import ActorContext, require_admin_or_agent, require_org_admin -from app.core.agent_tokens import generate_agent_token, hash_agent_token from app.core.auth import AuthContext, get_auth_context -from app.core.time import utcnow -from app.db import crud -from app.db.pagination import paginate -from app.db.session import async_session_maker, get_session -from app.integrations.openclaw_gateway import GatewayConfig as GatewayClientConfig -from app.integrations.openclaw_gateway import OpenClawGatewayError, ensure_session, send_message -from app.models.activity_events import ActivityEvent -from app.models.agents import Agent -from app.models.boards import Board -from app.models.gateways import Gateway -from app.models.organizations import Organization -from app.models.tasks import Task +from app.db.session import get_session from app.schemas.agents import ( AgentCreate, AgentHeartbeat, @@ -39,40 +21,15 @@ from app.schemas.agents import ( ) from app.schemas.common import OkResponse from app.schemas.pagination import DefaultLimitOffsetPage -from app.services.activity_log import record_activity -from app.services.agent_provisioning import ( - DEFAULT_HEARTBEAT_CONFIG, - AgentProvisionRequest, - MainAgentProvisionRequest, - ProvisionOptions, - cleanup_agent, - provision_agent, - provision_main_agent, -) -from app.services.gateway_agents import gateway_agent_session_key -from app.services.organizations import ( - OrganizationContext, - get_active_membership, - has_board_access, - is_org_admin, - list_accessible_board_ids, - require_board_access, -) +from app.services.openclaw import AgentLifecycleService, AgentUpdateOptions +from app.services.organizations import OrganizationContext if TYPE_CHECKING: - from collections.abc import AsyncIterator, Sequence - from fastapi_pagination.limit_offset import LimitOffsetPage - from sqlalchemy.sql.elements import ColumnElement from sqlmodel.ext.asyncio.session import AsyncSession - from sqlmodel.sql.expression import SelectOfScalar - - from app.models.users import User router = APIRouter(prefix="/agents", tags=["agents"]) -OFFLINE_AFTER = timedelta(minutes=10) -AGENT_SESSION_PREFIX = "agent" BOARD_ID_QUERY = Query(default=None) GATEWAY_ID_QUERY = Query(default=None) SINCE_QUERY = Query(default=None) @@ -101,852 +58,6 @@ def _agent_update_params( AGENT_UPDATE_PARAMS_DEP = Depends(_agent_update_params) -def _parse_since(value: str | None) -> datetime | None: - if not value: - return None - normalized = value.strip() - if not normalized: - return None - normalized = normalized.replace("Z", "+00:00") - try: - parsed = datetime.fromisoformat(normalized) - except ValueError: - return None - if parsed.tzinfo is not None: - return parsed.astimezone(UTC).replace(tzinfo=None) - return parsed - - -def _slugify(value: str) -> str: - slug = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-") - return slug or uuid4().hex - - -def _build_session_key(agent_name: str) -> str: - return f"{AGENT_SESSION_PREFIX}:{_slugify(agent_name)}:main" - - -def _workspace_path(agent_name: str, workspace_root: str | None) -> str: - if not workspace_root: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="Gateway workspace_root is required", - ) - root = workspace_root.rstrip("/") - return f"{root}/workspace-{_slugify(agent_name)}" - - -async def _require_board( - session: AsyncSession, - board_id: UUID | str | None, - *, - user: User | None = None, - write: bool = False, -) -> Board: - if not board_id: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="board_id is required", - ) - board = await Board.objects.by_id(board_id).first(session) - if board is None: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="Board not found", - ) - if user is not None: - await require_board_access(session, user=user, board=board, write=write) - return board - - -async def _require_gateway( - session: AsyncSession, - board: Board, -) -> tuple[Gateway, GatewayClientConfig]: - if not board.gateway_id: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="Board gateway_id is required", - ) - gateway = await Gateway.objects.by_id(board.gateway_id).first(session) - if gateway is None: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="Board gateway_id is invalid", - ) - if gateway.organization_id != board.organization_id: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="Board gateway_id is invalid", - ) - if not gateway.url: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="Gateway url is required", - ) - if not gateway.workspace_root: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="Gateway workspace_root is required", - ) - return gateway, GatewayClientConfig(url=gateway.url, token=gateway.token) - - -def _gateway_client_config(gateway: Gateway) -> GatewayClientConfig: - if not gateway.url: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="Gateway url is required", - ) - return GatewayClientConfig(url=gateway.url, token=gateway.token) - - -def _is_gateway_main(agent: Agent) -> bool: - return agent.board_id is None - - -def _to_agent_read(agent: Agent) -> AgentRead: - model = AgentRead.model_validate(agent, from_attributes=True) - return model.model_copy( - update={"is_gateway_main": _is_gateway_main(agent)}, - ) - - -def to_agent_read(agent: Agent) -> AgentRead: - """Convert an `Agent` model into its API read representation.""" - return _to_agent_read(agent) - - -def _coerce_agent_items(items: Sequence[Any]) -> list[Agent]: - agents: list[Agent] = [] - for item in items: - if not isinstance(item, Agent): - msg = "Expected Agent items from paginated query" - raise TypeError(msg) - agents.append(item) - return agents - - -async def _main_agent_gateway(session: AsyncSession, agent: Agent) -> Gateway | None: - if agent.board_id is not None: - return None - return await Gateway.objects.by_id(agent.gateway_id).first(session) - - -async def _ensure_gateway_session( - agent_name: str, - config: GatewayClientConfig, -) -> tuple[str, str | None]: - session_key = _build_session_key(agent_name) - try: - await ensure_session(session_key, config=config, label=agent_name) - except OpenClawGatewayError as exc: - return session_key, str(exc) - else: - return session_key, None - - -def _with_computed_status(agent: Agent) -> Agent: - now = utcnow() - if agent.status in {"deleting", "updating"}: - return agent - if agent.last_seen_at is None: - agent.status = "provisioning" - elif now - agent.last_seen_at > OFFLINE_AFTER: - agent.status = "offline" - return agent - - -def with_computed_status(agent: Agent) -> Agent: - """Apply transient online/offline status derivation to an agent model.""" - return _with_computed_status(agent) - - -def _serialize_agent(agent: Agent) -> dict[str, object]: - return _to_agent_read(_with_computed_status(agent)).model_dump( - mode="json", - ) - - -async def _fetch_agent_events( - session: AsyncSession, - board_id: UUID | None, - since: datetime, -) -> list[Agent]: - statement = select(Agent) - if board_id: - statement = statement.where(col(Agent.board_id) == board_id) - statement = statement.where( - or_( - col(Agent.updated_at) >= since, - col(Agent.last_seen_at) >= since, - ), - ).order_by(asc(col(Agent.updated_at))) - return list(await session.exec(statement)) - - -async def _require_user_context( - session: AsyncSession, - user: User | None, -) -> OrganizationContext: - if user is None: - raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED) - member = await get_active_membership(session, user) - if member is None: - raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) - organization = await Organization.objects.by_id(member.organization_id).first( - session, - ) - if organization is None: - raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) - return OrganizationContext(organization=organization, member=member) - - -async def _require_agent_access( - session: AsyncSession, - *, - agent: Agent, - ctx: OrganizationContext, - write: bool, -) -> None: - if agent.board_id is None: - if not is_org_admin(ctx.member): - raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) - gateway = await _main_agent_gateway(session, agent) - if gateway is None or gateway.organization_id != ctx.organization.id: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) - return - - board = await Board.objects.by_id(agent.board_id).first(session) - if board is None or board.organization_id != ctx.organization.id: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) - if not await has_board_access(session, member=ctx.member, board=board, write=write): - raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) - - -def _record_heartbeat(session: AsyncSession, agent: Agent) -> None: - record_activity( - session, - event_type="agent.heartbeat", - message=f"Heartbeat received from {agent.name}.", - agent_id=agent.id, - ) - - -def _record_instruction_failure( - session: AsyncSession, - agent: Agent, - error: str, - action: str, -) -> None: - action_label = action.replace("_", " ").capitalize() - record_activity( - session, - event_type=f"agent.{action}.failed", - message=f"{action_label} message failed: {error}", - agent_id=agent.id, - ) - - -async def _coerce_agent_create_payload( - session: AsyncSession, - payload: AgentCreate, - actor: ActorContext, -) -> AgentCreate: - if actor.actor_type == "user": - ctx = await _require_user_context(session, actor.user) - if not is_org_admin(ctx.member): - raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) - return payload - - if actor.actor_type == "agent": - if not actor.agent or not actor.agent.is_board_lead: - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail="Only board leads can create agents", - ) - if not actor.agent.board_id: - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail="Board lead must be assigned to a board", - ) - if payload.board_id and payload.board_id != actor.agent.board_id: - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail="Board leads can only create agents in their own board", - ) - return AgentCreate(**{**payload.model_dump(), "board_id": actor.agent.board_id}) - - return payload - - -async def _ensure_unique_agent_name( - session: AsyncSession, - *, - board: Board, - gateway: Gateway, - requested_name: str, -) -> None: - if not requested_name: - return - - existing = ( - await session.exec( - select(Agent) - .where(Agent.board_id == board.id) - .where(col(Agent.name).ilike(requested_name)), - ) - ).first() - if existing: - raise HTTPException( - status_code=status.HTTP_409_CONFLICT, - detail="An agent with this name already exists on this board.", - ) - - existing_gateway = ( - await session.exec( - select(Agent) - .join(Board, col(Agent.board_id) == col(Board.id)) - .where(col(Board.gateway_id) == gateway.id) - .where(col(Agent.name).ilike(requested_name)), - ) - ).first() - if existing_gateway: - raise HTTPException( - status_code=status.HTTP_409_CONFLICT, - detail=("An agent with this name already exists in this gateway " "workspace."), - ) - - desired_session_key = _build_session_key(requested_name) - existing_session_key = ( - await session.exec( - select(Agent) - .join(Board, col(Agent.board_id) == col(Board.id)) - .where(col(Board.gateway_id) == gateway.id) - .where(col(Agent.openclaw_session_id) == desired_session_key), - ) - ).first() - if existing_session_key: - raise HTTPException( - status_code=status.HTTP_409_CONFLICT, - detail=( - "This agent name would collide with an existing workspace " - "session key. Pick a different name." - ), - ) - - -async def _persist_new_agent( - session: AsyncSession, - *, - data: dict[str, Any], - client_config: GatewayClientConfig, -) -> tuple[Agent, str, str | None]: - agent = Agent.model_validate(data) - agent.status = "provisioning" - raw_token = generate_agent_token() - agent.agent_token_hash = hash_agent_token(raw_token) - if agent.heartbeat_config is None: - agent.heartbeat_config = DEFAULT_HEARTBEAT_CONFIG.copy() - agent.provision_requested_at = utcnow() - agent.provision_action = "provision" - session_key, session_error = await _ensure_gateway_session( - agent.name, - client_config, - ) - agent.openclaw_session_id = session_key - session.add(agent) - await session.commit() - await session.refresh(agent) - return agent, raw_token, session_error - - -async def _record_session_creation( - session: AsyncSession, - *, - agent: Agent, - session_error: str | None, -) -> None: - if session_error: - record_activity( - session, - event_type="agent.session.failed", - message=f"Session sync failed for {agent.name}: {session_error}", - agent_id=agent.id, - ) - else: - record_activity( - session, - event_type="agent.session.created", - message=f"Session created for {agent.name}.", - agent_id=agent.id, - ) - await session.commit() - - -async def _provision_new_agent( - session: AsyncSession, - *, - agent: Agent, - request: AgentProvisionRequest, - client_config: GatewayClientConfig, -) -> None: - try: - await provision_agent(agent, request) - await _send_wakeup_message(agent, client_config, verb="provisioned") - agent.provision_confirm_token_hash = None - agent.provision_requested_at = None - agent.provision_action = None - agent.updated_at = utcnow() - session.add(agent) - await session.commit() - record_activity( - session, - event_type="agent.provision", - message=f"Provisioned directly for {agent.name}.", - agent_id=agent.id, - ) - record_activity( - session, - event_type="agent.wakeup.sent", - message=f"Wakeup message sent to {agent.name}.", - agent_id=agent.id, - ) - await session.commit() - except OpenClawGatewayError as exc: - _record_instruction_failure(session, agent, str(exc), "provision") - await session.commit() - except (OSError, RuntimeError, ValueError) as exc: # pragma: no cover - _record_instruction_failure(session, agent, str(exc), "provision") - await session.commit() - - -@dataclass(frozen=True, slots=True) -class _AgentUpdateProvisionTarget: - is_main_agent: bool - board: Board | None - gateway: Gateway - client_config: GatewayClientConfig - - -@dataclass(frozen=True, slots=True) -class _AgentUpdateProvisionRequest: - target: _AgentUpdateProvisionTarget - raw_token: str - user: User | None - force_bootstrap: bool - - -async def _validate_agent_update_inputs( - session: AsyncSession, - *, - ctx: OrganizationContext, - updates: dict[str, Any], - make_main: bool | None, -) -> None: - if make_main and not is_org_admin(ctx.member): - raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) - if "status" in updates: - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail="status is controlled by agent heartbeat", - ) - if "board_id" in updates and updates["board_id"] is not None: - new_board = await _require_board(session, updates["board_id"]) - if new_board.organization_id != ctx.organization.id: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) - if not await has_board_access( - session, - member=ctx.member, - board=new_board, - write=True, - ): - raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) - - -async def _apply_agent_update_mutations( - session: AsyncSession, - *, - agent: Agent, - updates: dict[str, Any], - make_main: bool | None, -) -> tuple[Gateway | None, Gateway | None]: - main_gateway = await _main_agent_gateway(session, agent) - gateway_for_main: Gateway | None = None - - if make_main: - board_source = updates.get("board_id") or agent.board_id - board_for_main = await _require_board(session, board_source) - gateway_for_main, _ = await _require_gateway(session, board_for_main) - updates["board_id"] = None - updates["gateway_id"] = gateway_for_main.id - agent.is_board_lead = False - agent.openclaw_session_id = gateway_agent_session_key(gateway_for_main) - main_gateway = gateway_for_main - elif make_main is not None: - if "board_id" not in updates or updates["board_id"] is None: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="board_id is required when converting a gateway-main agent to board scope", - ) - board = await _require_board(session, updates["board_id"]) - if board.gateway_id is None: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="Board gateway_id is required", - ) - updates["gateway_id"] = board.gateway_id - agent.openclaw_session_id = None - - if make_main is None and "board_id" in updates: - board = await _require_board(session, updates["board_id"]) - if board.gateway_id is None: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="Board gateway_id is required", - ) - updates["gateway_id"] = board.gateway_id - for key, value in updates.items(): - setattr(agent, key, value) - - if make_main is None and main_gateway is not None: - agent.board_id = None - agent.gateway_id = main_gateway.id - agent.is_board_lead = False - if make_main is False and agent.board_id is not None: - board = await _require_board(session, agent.board_id) - if board.gateway_id is None: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="Board gateway_id is required", - ) - agent.gateway_id = board.gateway_id - agent.updated_at = utcnow() - if agent.heartbeat_config is None: - agent.heartbeat_config = DEFAULT_HEARTBEAT_CONFIG.copy() - session.add(agent) - await session.commit() - await session.refresh(agent) - return main_gateway, gateway_for_main - - -async def _resolve_agent_update_target( - session: AsyncSession, - *, - agent: Agent, - make_main: bool | None, - main_gateway: Gateway | None, - gateway_for_main: Gateway | None, -) -> _AgentUpdateProvisionTarget: - if make_main: - if gateway_for_main is None: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="Gateway agent requires a gateway configuration", - ) - return _AgentUpdateProvisionTarget( - is_main_agent=True, - board=None, - gateway=gateway_for_main, - client_config=_gateway_client_config(gateway_for_main), - ) - - if make_main is None and agent.board_id is None and main_gateway is not None: - return _AgentUpdateProvisionTarget( - is_main_agent=True, - board=None, - gateway=main_gateway, - client_config=_gateway_client_config(main_gateway), - ) - - if agent.board_id is None: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="board_id is required for non-main agents", - ) - board = await _require_board(session, agent.board_id) - gateway, client_config = await _require_gateway(session, board) - return _AgentUpdateProvisionTarget( - is_main_agent=False, - board=board, - gateway=gateway, - client_config=client_config, - ) - - -async def _ensure_agent_update_session( - session: AsyncSession, - *, - agent: Agent, - client_config: GatewayClientConfig, -) -> None: - session_key = agent.openclaw_session_id or _build_session_key(agent.name) - try: - await ensure_session(session_key, config=client_config, label=agent.name) - if not agent.openclaw_session_id: - agent.openclaw_session_id = session_key - session.add(agent) - await session.commit() - await session.refresh(agent) - except OpenClawGatewayError as exc: - _record_instruction_failure(session, agent, str(exc), "update") - await session.commit() - - -def _mark_agent_update_pending(agent: Agent) -> str: - raw_token = generate_agent_token() - agent.agent_token_hash = hash_agent_token(raw_token) - agent.provision_requested_at = utcnow() - agent.provision_action = "update" - agent.status = "updating" - return raw_token - - -async def _provision_updated_agent( - session: AsyncSession, - *, - agent: Agent, - request: _AgentUpdateProvisionRequest, -) -> None: - try: - if request.target.is_main_agent: - await provision_main_agent( - agent, - MainAgentProvisionRequest( - gateway=request.target.gateway, - auth_token=request.raw_token, - user=request.user, - session_key=agent.openclaw_session_id, - options=ProvisionOptions( - action="update", - force_bootstrap=request.force_bootstrap, - reset_session=True, - ), - ), - ) - else: - if request.target.board is None: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="board is required for non-main agent provisioning", - ) - await provision_agent( - agent, - AgentProvisionRequest( - board=request.target.board, - gateway=request.target.gateway, - auth_token=request.raw_token, - user=request.user, - options=ProvisionOptions( - action="update", - force_bootstrap=request.force_bootstrap, - reset_session=True, - ), - ), - ) - await _send_wakeup_message( - agent, - request.target.client_config, - verb="updated", - ) - agent.provision_confirm_token_hash = None - agent.provision_requested_at = None - agent.provision_action = None - agent.status = "online" - agent.updated_at = utcnow() - session.add(agent) - await session.commit() - record_activity( - session, - event_type="agent.update.direct", - message=f"Updated directly for {agent.name}.", - agent_id=agent.id, - ) - record_activity( - session, - event_type="agent.wakeup.sent", - message=f"Wakeup message sent to {agent.name}.", - agent_id=agent.id, - ) - await session.commit() - except OpenClawGatewayError as exc: - _record_instruction_failure(session, agent, str(exc), "update") - await session.commit() - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail=f"Gateway update failed: {exc}", - ) from exc - except (OSError, RuntimeError, ValueError) as exc: # pragma: no cover - _record_instruction_failure(session, agent, str(exc), "update") - await session.commit() - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="Unexpected error updating agent provisioning.", - ) from exc - - -def _heartbeat_lookup_statement(payload: AgentHeartbeatCreate) -> SelectOfScalar[Agent]: - statement = Agent.objects.filter_by(name=payload.name).statement - if payload.board_id is not None: - statement = statement.where(Agent.board_id == payload.board_id) - return statement - - -async def _create_agent_from_heartbeat( - session: AsyncSession, - *, - payload: AgentHeartbeatCreate, - actor: ActorContext, -) -> Agent: - if actor.actor_type == "agent": - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) - if actor.actor_type == "user": - ctx = await _require_user_context(session, actor.user) - if not is_org_admin(ctx.member): - raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) - - board = await _require_board( - session, - payload.board_id, - user=actor.user, - write=True, - ) - gateway, client_config = await _require_gateway(session, board) - data: dict[str, Any] = { - "name": payload.name, - "board_id": board.id, - "gateway_id": gateway.id, - "heartbeat_config": DEFAULT_HEARTBEAT_CONFIG.copy(), - } - agent, raw_token, session_error = await _persist_new_agent( - session, - data=data, - client_config=client_config, - ) - await _record_session_creation( - session, - agent=agent, - session_error=session_error, - ) - await _provision_new_agent( - session, - agent=agent, - request=AgentProvisionRequest( - board=board, - gateway=gateway, - auth_token=raw_token, - user=actor.user, - options=ProvisionOptions(action="provision"), - ), - client_config=client_config, - ) - return agent - - -async def _handle_existing_user_heartbeat_agent( - session: AsyncSession, - *, - agent: Agent, - user: User | None, -) -> None: - ctx = await _require_user_context(session, user) - await _require_agent_access(session, agent=agent, ctx=ctx, write=True) - - if agent.agent_token_hash is not None: - return - - raw_token = generate_agent_token() - agent.agent_token_hash = hash_agent_token(raw_token) - if agent.heartbeat_config is None: - agent.heartbeat_config = DEFAULT_HEARTBEAT_CONFIG.copy() - agent.provision_requested_at = utcnow() - agent.provision_action = "provision" - session.add(agent) - await session.commit() - await session.refresh(agent) - board = await _require_board( - session, - str(agent.board_id) if agent.board_id else None, - user=user, - write=True, - ) - gateway, client_config = await _require_gateway(session, board) - await _provision_new_agent( - session, - agent=agent, - request=AgentProvisionRequest( - board=board, - gateway=gateway, - auth_token=raw_token, - user=user, - options=ProvisionOptions(action="provision"), - ), - client_config=client_config, - ) - - -async def _ensure_heartbeat_session_key( - session: AsyncSession, - *, - agent: Agent, - actor: ActorContext, -) -> None: - if agent.openclaw_session_id: - return - board = await _require_board( - session, - str(agent.board_id) if agent.board_id else None, - user=actor.user if actor.actor_type == "user" else None, - write=actor.actor_type == "user", - ) - _, client_config = await _require_gateway(session, board) - session_key, session_error = await _ensure_gateway_session( - agent.name, - client_config, - ) - agent.openclaw_session_id = session_key - session.add(agent) - await _record_session_creation( - session, - agent=agent, - session_error=session_error, - ) - - -async def _commit_heartbeat( - session: AsyncSession, - *, - agent: Agent, - status_value: str | None, -) -> AgentRead: - if status_value: - agent.status = status_value - elif agent.status == "provisioning": - agent.status = "online" - agent.last_seen_at = utcnow() - agent.updated_at = utcnow() - _record_heartbeat(session, agent) - session.add(agent) - await session.commit() - await session.refresh(agent) - return _to_agent_read(_with_computed_status(agent)) - - -async def _send_wakeup_message( - agent: Agent, - config: GatewayClientConfig, - verb: str = "provisioned", -) -> None: - session_key = agent.openclaw_session_id or _build_session_key(agent.name) - await ensure_session(session_key, config=config, label=agent.name) - message = ( - f"Hello {agent.name}. Your workspace has been {verb}.\n\n" - "Start the agent, run BOOT.md, and if BOOTSTRAP.md exists run it once " - "then delete it. Begin heartbeats after startup." - ) - await send_message(message, session_key=session_key, config=config, deliver=True) - - @router.get("", response_model=DefaultLimitOffsetPage[AgentRead]) async def list_agents( board_id: UUID | None = BOARD_ID_QUERY, @@ -955,48 +66,12 @@ async def list_agents( ctx: OrganizationContext = ORG_ADMIN_DEP, ) -> LimitOffsetPage[AgentRead]: """List agents visible to the active organization admin.""" - board_ids = await list_accessible_board_ids(session, member=ctx.member, write=False) - if board_id is not None and board_id not in set(board_ids): - raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) - base_filters: list[ColumnElement[bool]] = [] - if board_ids: - base_filters.append(col(Agent.board_id).in_(board_ids)) - if is_org_admin(ctx.member): - gateways = await Gateway.objects.filter_by( - organization_id=ctx.organization.id, - ).all(session) - gateway_ids = [gateway.id for gateway in gateways] - if gateway_ids: - base_filters.append( - (col(Agent.gateway_id).in_(gateway_ids)) & (col(Agent.board_id).is_(None)), - ) - if base_filters: - if len(base_filters) == 1: - statement = select(Agent).where(base_filters[0]) - else: - statement = select(Agent).where(or_(*base_filters)) - else: - statement = select(Agent).where(col(Agent.id).is_(None)) - if board_id is not None: - statement = statement.where(col(Agent.board_id) == board_id) - if gateway_id is not None: - gateway = await Gateway.objects.by_id(gateway_id).first(session) - if gateway is None or gateway.organization_id != ctx.organization.id: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) - gateway_board_ids = select(Board.id).where(col(Board.gateway_id) == gateway_id) - statement = statement.where( - or_( - col(Agent.board_id).in_(gateway_board_ids), - (col(Agent.gateway_id) == gateway_id) & (col(Agent.board_id).is_(None)), - ), - ) - statement = statement.order_by(col(Agent.created_at).desc()) - - def _transform(items: Sequence[Any]) -> Sequence[Any]: - agents = _coerce_agent_items(items) - return [_to_agent_read(_with_computed_status(agent)) for agent in agents] - - return await paginate(session, statement, transformer=_transform) + service = AgentLifecycleService(session) + return await service.list_agents( + board_id=board_id, + gateway_id=gateway_id, + ctx=ctx, + ) @router.get("/stream") @@ -1008,38 +83,13 @@ async def stream_agents( ctx: OrganizationContext = ORG_ADMIN_DEP, ) -> EventSourceResponse: """Stream agent updates as SSE events.""" - since_dt = _parse_since(since) or utcnow() - last_seen = since_dt - board_ids = await list_accessible_board_ids(session, member=ctx.member, write=False) - allowed_ids = set(board_ids) - if board_id is not None and board_id not in allowed_ids: - raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) - - async def event_generator() -> AsyncIterator[dict[str, str]]: - nonlocal last_seen - while True: - if await request.is_disconnected(): - break - async with async_session_maker() as stream_session: - if board_id is not None: - agents = await _fetch_agent_events( - stream_session, - board_id, - last_seen, - ) - elif allowed_ids: - agents = await _fetch_agent_events(stream_session, None, last_seen) - agents = [agent for agent in agents if agent.board_id in allowed_ids] - else: - agents = [] - for agent in agents: - updated_at = agent.updated_at or agent.last_seen_at or utcnow() - last_seen = max(updated_at, last_seen) - payload = {"agent": _serialize_agent(agent)} - yield {"event": "agent", "data": json.dumps(payload)} - await asyncio.sleep(2) - - return EventSourceResponse(event_generator(), ping=15) + service = AgentLifecycleService(session) + return await service.stream_agents( + request=request, + board_id=board_id, + since=since, + ctx=ctx, + ) @router.post("", response_model=AgentRead) @@ -1049,48 +99,8 @@ async def create_agent( actor: ActorContext = ACTOR_DEP, ) -> AgentRead: """Create and provision an agent.""" - payload = await _coerce_agent_create_payload(session, payload, actor) - - board = await _require_board( - session, - payload.board_id, - user=actor.user if actor.actor_type == "user" else None, - write=actor.actor_type == "user", - ) - gateway, client_config = await _require_gateway(session, board) - data = payload.model_dump() - data["gateway_id"] = gateway.id - requested_name = (data.get("name") or "").strip() - await _ensure_unique_agent_name( - session, - board=board, - gateway=gateway, - requested_name=requested_name, - ) - agent, raw_token, session_error = await _persist_new_agent( - session, - data=data, - client_config=client_config, - ) - await _record_session_creation( - session, - agent=agent, - session_error=session_error, - ) - provision_request = AgentProvisionRequest( - board=board, - gateway=gateway, - auth_token=raw_token, - user=actor.user if actor.actor_type == "user" else None, - options=ProvisionOptions(action="provision"), - ) - await _provision_new_agent( - session, - agent=agent, - request=provision_request, - client_config=client_config, - ) - return _to_agent_read(_with_computed_status(agent)) + service = AgentLifecycleService(session) + return await service.create_agent(payload=payload, actor=actor) @router.get("/{agent_id}", response_model=AgentRead) @@ -1100,11 +110,8 @@ async def get_agent( ctx: OrganizationContext = ORG_ADMIN_DEP, ) -> AgentRead: """Get a single agent by id.""" - agent = await Agent.objects.by_id(agent_id).first(session) - if agent is None: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) - await _require_agent_access(session, agent=agent, ctx=ctx, write=False) - return _to_agent_read(_with_computed_status(agent)) + service = AgentLifecycleService(session) + return await service.get_agent(agent_id=agent_id, ctx=ctx) @router.patch("/{agent_id}", response_model=AgentRead) @@ -1115,54 +122,16 @@ async def update_agent( session: AsyncSession = SESSION_DEP, ) -> AgentRead: """Update agent metadata and optionally reprovision.""" - agent = await Agent.objects.by_id(agent_id).first(session) - if agent is None: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) - await _require_agent_access(session, agent=agent, ctx=params.ctx, write=True) - updates = payload.model_dump(exclude_unset=True) - make_main = updates.pop("is_gateway_main", None) - await _validate_agent_update_inputs( - session, - ctx=params.ctx, - updates=updates, - make_main=make_main, + service = AgentLifecycleService(session) + return await service.update_agent( + agent_id=agent_id, + payload=payload, + options=AgentUpdateOptions( + force=params.force, + user=params.auth.user, + context=params.ctx, + ), ) - if not updates and not params.force and make_main is None: - return _to_agent_read(_with_computed_status(agent)) - main_gateway, gateway_for_main = await _apply_agent_update_mutations( - session, - agent=agent, - updates=updates, - make_main=make_main, - ) - target = await _resolve_agent_update_target( - session, - agent=agent, - make_main=make_main, - main_gateway=main_gateway, - gateway_for_main=gateway_for_main, - ) - await _ensure_agent_update_session( - session, - agent=agent, - client_config=target.client_config, - ) - raw_token = _mark_agent_update_pending(agent) - session.add(agent) - await session.commit() - await session.refresh(agent) - provision_request = _AgentUpdateProvisionRequest( - target=target, - raw_token=raw_token, - user=params.auth.user, - force_bootstrap=params.force, - ) - await _provision_updated_agent( - session, - agent=agent, - request=provision_request, - ) - return _to_agent_read(_with_computed_status(agent)) @router.post("/{agent_id}/heartbeat", response_model=AgentRead) @@ -1173,21 +142,8 @@ async def heartbeat_agent( actor: ActorContext = ACTOR_DEP, ) -> AgentRead: """Record a heartbeat for a specific agent.""" - agent = await Agent.objects.by_id(agent_id).first(session) - if agent is None: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) - if actor.actor_type == "agent" and actor.agent and actor.agent.id != agent.id: - raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) - if actor.actor_type == "user": - ctx = await _require_user_context(session, actor.user) - if not is_org_admin(ctx.member): - raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) - await _require_agent_access(session, agent=agent, ctx=ctx, write=True) - return await _commit_heartbeat( - session, - agent=agent, - status_value=payload.status, - ) + service = AgentLifecycleService(session) + return await service.heartbeat_agent(agent_id=agent_id, payload=payload, actor=actor) @router.post("/heartbeat", response_model=AgentRead) @@ -1197,42 +153,8 @@ async def heartbeat_or_create_agent( actor: ActorContext = ACTOR_DEP, ) -> AgentRead: """Heartbeat an existing agent or create/provision one if needed.""" - # Agent tokens must heartbeat their authenticated agent record. - # Names are not unique. - if actor.actor_type == "agent" and actor.agent: - return await heartbeat_agent( - agent_id=str(actor.agent.id), - payload=AgentHeartbeat(status=payload.status), - session=session, - actor=actor, - ) - - agent = (await session.exec(_heartbeat_lookup_statement(payload))).first() - if agent is None: - agent = await _create_agent_from_heartbeat( - session, - payload=payload, - actor=actor, - ) - elif actor.actor_type == "user": - await _handle_existing_user_heartbeat_agent( - session, - agent=agent, - user=actor.user, - ) - elif actor.actor_type == "agent" and actor.agent and actor.agent.id != agent.id: - raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) - - await _ensure_heartbeat_session_key( - session, - agent=agent, - actor=actor, - ) - return await _commit_heartbeat( - session, - agent=agent, - status_value=payload.status, - ) + service = AgentLifecycleService(session) + return await service.heartbeat_or_create_agent(payload=payload, actor=actor) @router.delete("/{agent_id}", response_model=OkResponse) @@ -1242,91 +164,5 @@ async def delete_agent( ctx: OrganizationContext = ORG_ADMIN_DEP, ) -> OkResponse: """Delete an agent and clean related task state.""" - agent = await Agent.objects.by_id(agent_id).first(session) - if agent is None: - return OkResponse() - await _require_agent_access(session, agent=agent, ctx=ctx, write=True) - - board = await _require_board( - session, - str(agent.board_id) if agent.board_id else None, - ) - gateway, client_config = await _require_gateway(session, board) - try: - workspace_path = await cleanup_agent(agent, gateway) - except OpenClawGatewayError as exc: - _record_instruction_failure(session, agent, str(exc), "delete") - await session.commit() - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail=f"Gateway cleanup failed: {exc}", - ) from exc - except (OSError, RuntimeError, ValueError) as exc: # pragma: no cover - _record_instruction_failure(session, agent, str(exc), "delete") - await session.commit() - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=f"Workspace cleanup failed: {exc}", - ) from exc - - record_activity( - session, - event_type="agent.delete.direct", - message=f"Deleted agent {agent.name}.", - agent_id=None, - ) - now = utcnow() - await crud.update_where( - session, - Task, - col(Task.assigned_agent_id) == agent.id, - col(Task.status) == "in_progress", - assigned_agent_id=None, - status="inbox", - in_progress_at=None, - updated_at=now, - commit=False, - ) - await crud.update_where( - session, - Task, - col(Task.assigned_agent_id) == agent.id, - col(Task.status) != "in_progress", - assigned_agent_id=None, - updated_at=now, - commit=False, - ) - await crud.update_where( - session, - ActivityEvent, - col(ActivityEvent.agent_id) == agent.id, - agent_id=None, - commit=False, - ) - await session.delete(agent) - await session.commit() - - # Always ask the gateway agent to confirm workspace cleanup. - try: - main_session = gateway_agent_session_key(gateway) - if main_session and workspace_path: - cleanup_message = ( - "Cleanup request for deleted agent.\n\n" - f"Agent name: {agent.name}\n" - f"Agent id: {agent.id}\n" - f"Workspace path: {workspace_path}\n\n" - "Actions:\n" - "1) Remove the workspace directory.\n" - "2) Reply NO_REPLY.\n" - ) - await ensure_session(main_session, config=client_config, label="Gateway Agent") - await send_message( - cleanup_message, - session_key=main_session, - config=client_config, - deliver=False, - ) - except (OSError, OpenClawGatewayError, ValueError): - # Cleanup request is best-effort; deletion already completed. - pass - return OkResponse() + service = AgentLifecycleService(session) + return await service.delete_agent(agent_id=agent_id, ctx=ctx) diff --git a/backend/app/api/board_group_memory.py b/backend/app/api/board_group_memory.py index 659e63a8..91b21789 100644 --- a/backend/app/api/board_group_memory.py +++ b/backend/app/api/board_group_memory.py @@ -25,17 +25,16 @@ from app.core.config import settings from app.core.time import utcnow from app.db.pagination import paginate from app.db.session import async_session_maker, get_session -from app.integrations.openclaw_gateway import GatewayConfig as GatewayClientConfig -from app.integrations.openclaw_gateway import OpenClawGatewayError, ensure_session, send_message +from app.integrations.openclaw_gateway import OpenClawGatewayError from app.models.agents import Agent from app.models.board_group_memory import BoardGroupMemory from app.models.board_groups import BoardGroup from app.models.boards import Board -from app.models.gateways import Gateway from app.models.users import User from app.schemas.board_group_memory import BoardGroupMemoryCreate, BoardGroupMemoryRead from app.schemas.pagination import DefaultLimitOffsetPage from app.services.mentions import extract_mentions, matches_agent_mention +from app.services.openclaw import optional_gateway_config_for_board, send_gateway_agent_message from app.services.organizations import ( is_org_admin, list_accessible_board_ids, @@ -95,30 +94,6 @@ def _serialize_memory(memory: BoardGroupMemory) -> dict[str, object]: ).model_dump(mode="json") -async def _gateway_config( - session: AsyncSession, - board: Board, -) -> GatewayClientConfig | None: - if board.gateway_id is None: - return None - gateway = await Gateway.objects.by_id(board.gateway_id).first(session) - if gateway is None or not gateway.url: - return None - return GatewayClientConfig(url=gateway.url, token=gateway.token) - - -async def _send_agent_message( - *, - session_key: str, - config: GatewayClientConfig, - agent_name: str, - message: str, - deliver: bool = False, -) -> None: - await ensure_session(session_key, config=config, label=agent_name) - await send_message(message, session_key=session_key, config=config, deliver=deliver) - - async def _fetch_memory_events( session: AsyncSession, board_group_id: UUID, @@ -249,7 +224,7 @@ async def _notify_group_target( board = context.board_by_id.get(board_id) if board is None: return - config = await _gateway_config(context.session, board) + config = await optional_gateway_config_for_board(context.session, board) if config is None: return header = _group_header( @@ -266,7 +241,7 @@ async def _notify_group_target( 'Body: {"content":"...","tags":["chat"]}' ) try: - await _send_agent_message( + await send_gateway_agent_message( session_key=session_key, config=config, agent_name=agent.name, diff --git a/backend/app/api/board_groups.py b/backend/app/api/board_groups.py index db6b0166..76f87b01 100644 --- a/backend/app/api/board_groups.py +++ b/backend/app/api/board_groups.py @@ -29,8 +29,8 @@ from app.schemas.board_groups import BoardGroupCreate, BoardGroupRead, BoardGrou from app.schemas.common import OkResponse from app.schemas.pagination import DefaultLimitOffsetPage from app.schemas.view_models import BoardGroupSnapshot -from app.services.agent_provisioning import DEFAULT_HEARTBEAT_CONFIG, sync_gateway_agent_heartbeats from app.services.board_group_snapshot import build_group_snapshot +from app.services.openclaw import DEFAULT_HEARTBEAT_CONFIG, sync_gateway_agent_heartbeats from app.services.organizations import ( OrganizationContext, board_access_filter, diff --git a/backend/app/api/board_memory.py b/backend/app/api/board_memory.py index 006aa1ab..bc00951e 100644 --- a/backend/app/api/board_memory.py +++ b/backend/app/api/board_memory.py @@ -24,13 +24,13 @@ from app.core.time import utcnow from app.db.pagination import paginate from app.db.session import async_session_maker, get_session from app.integrations.openclaw_gateway import GatewayConfig as GatewayClientConfig -from app.integrations.openclaw_gateway import OpenClawGatewayError, ensure_session, send_message +from app.integrations.openclaw_gateway import OpenClawGatewayError from app.models.agents import Agent from app.models.board_memory import BoardMemory -from app.models.gateways import Gateway from app.schemas.board_memory import BoardMemoryCreate, BoardMemoryRead from app.schemas.pagination import DefaultLimitOffsetPage from app.services.mentions import extract_mentions, matches_agent_mention +from app.services.openclaw import optional_gateway_config_for_board, send_gateway_agent_message if TYPE_CHECKING: from collections.abc import AsyncIterator @@ -75,30 +75,6 @@ def _serialize_memory(memory: BoardMemory) -> dict[str, object]: ).model_dump(mode="json") -async def _gateway_config( - session: AsyncSession, - board: Board, -) -> GatewayClientConfig | None: - if board.gateway_id is None: - return None - gateway = await Gateway.objects.by_id(board.gateway_id).first(session) - if gateway is None or not gateway.url: - return None - return GatewayClientConfig(url=gateway.url, token=gateway.token) - - -async def _send_agent_message( - *, - session_key: str, - config: GatewayClientConfig, - agent_name: str, - message: str, - deliver: bool = False, -) -> None: - await ensure_session(session_key, config=config, label=agent_name) - await send_message(message, session_key=session_key, config=config, deliver=deliver) - - async def _fetch_memory_events( session: AsyncSession, board_id: UUID, @@ -138,7 +114,7 @@ async def _send_control_command( if not agent.openclaw_session_id: continue try: - await _send_agent_message( + await send_gateway_agent_message( session_key=agent.openclaw_session_id, config=config, agent_name=agent.name, @@ -184,7 +160,7 @@ async def _notify_chat_targets( ) -> None: if not memory.content: return - config = await _gateway_config(session, board) + config = await optional_gateway_config_for_board(session, board) if config is None: return @@ -230,7 +206,7 @@ async def _notify_chat_targets( 'Body: {"content":"...","tags":["chat"]}' ) try: - await _send_agent_message( + await send_gateway_agent_message( session_key=agent.openclaw_session_id, config=config, agent_name=agent.name, diff --git a/backend/app/api/board_onboarding.py b/backend/app/api/board_onboarding.py index 4ddfd752..183367e8 100644 --- a/backend/app/api/board_onboarding.py +++ b/backend/app/api/board_onboarding.py @@ -20,8 +20,6 @@ from app.api.deps import ( from app.core.config import settings from app.core.time import utcnow from app.db.session import get_session -from app.integrations.openclaw_gateway import GatewayConfig as GatewayClientConfig -from app.integrations.openclaw_gateway import OpenClawGatewayError, ensure_session, send_message from app.models.board_onboarding import BoardOnboardingSession from app.models.gateways import Gateway from app.schemas.board_onboarding import ( @@ -35,8 +33,13 @@ from app.schemas.board_onboarding import ( BoardOnboardingUserProfile, ) from app.schemas.boards import BoardRead -from app.services.board_leads import LeadAgentOptions, LeadAgentRequest, ensure_board_lead_agent -from app.services.gateway_agents import gateway_agent_session_key +from app.services.openclaw import ( + BoardOnboardingMessagingService, + LeadAgentOptions, + LeadAgentRequest, + ensure_board_lead_agent, + require_gateway_config_for_board, +) if TYPE_CHECKING: from sqlmodel.ext.asyncio.session import AsyncSession @@ -54,18 +57,6 @@ ACTOR_DEP = Depends(require_admin_or_agent) ADMIN_AUTH_DEP = Depends(require_admin_auth) -async def _gateway_config( - session: AsyncSession, - board: Board, -) -> tuple[Gateway, GatewayClientConfig]: - if not board.gateway_id: - raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY) - gateway = await Gateway.objects.by_id(board.gateway_id).first(session) - if gateway is None or not gateway.url: - raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY) - return gateway, GatewayClientConfig(url=gateway.url, token=gateway.token) - - def _parse_draft_user_profile( draft_goal: object, ) -> BoardOnboardingUserProfile | None: @@ -178,8 +169,7 @@ async def start_onboarding( if onboarding: return onboarding - gateway, config = await _gateway_config(session, board) - session_key = gateway_agent_session_key(gateway) + dispatcher = BoardOnboardingMessagingService(session) base_url = settings.base_url or "http://localhost:8000" prompt = ( "BOARD ONBOARDING REQUEST\n\n" @@ -246,19 +236,11 @@ async def start_onboarding( "working style.\n" ) - try: - await ensure_session(session_key, config=config, label="Gateway Agent") - await send_message( - prompt, - session_key=session_key, - config=config, - deliver=False, - ) - except OpenClawGatewayError as exc: - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail=str(exc), - ) from exc + session_key = await dispatcher.dispatch_start_prompt( + board=board, + prompt=prompt, + correlation_id=f"onboarding.start:{board.id}", + ) onboarding = BoardOnboardingSession( board_id=board.id, @@ -289,7 +271,7 @@ async def answer_onboarding( if onboarding is None: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) - _, config = await _gateway_config(session, board) + dispatcher = BoardOnboardingMessagingService(session) answer_text = payload.answer if payload.other_text: answer_text = f"{payload.answer}: {payload.other_text}" @@ -299,19 +281,12 @@ async def answer_onboarding( {"role": "user", "content": answer_text, "timestamp": utcnow().isoformat()}, ) - try: - await ensure_session(onboarding.session_key, config=config, label="Gateway Agent") - await send_message( - answer_text, - session_key=onboarding.session_key, - config=config, - deliver=False, - ) - except OpenClawGatewayError as exc: - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail=str(exc), - ) from exc + await dispatcher.dispatch_answer( + board=board, + onboarding=onboarding, + answer_text=answer_text, + correlation_id=f"onboarding.answer:{board.id}:{onboarding.id}", + ) onboarding.messages = messages onboarding.updated_at = utcnow() @@ -337,10 +312,7 @@ async def agent_onboarding_update( if board.gateway_id: gateway = await Gateway.objects.by_id(board.gateway_id).first(session) - if ( - gateway - and (agent.gateway_id != gateway.id or agent.board_id is not None) - ): + if gateway and (agent.gateway_id != gateway.id or agent.board_id is not None): raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) onboarding = ( @@ -421,7 +393,7 @@ async def confirm_onboarding( lead_agent = _parse_draft_lead_agent(onboarding.draft_goal) lead_options = _lead_agent_options(lead_agent) - gateway, config = await _gateway_config(session, board) + gateway, config = await require_gateway_config_for_board(session, board) session.add(board) session.add(onboarding) await session.commit() diff --git a/backend/app/api/boards.py b/backend/app/api/boards.py index 86991db3..0d2035ca 100644 --- a/backend/app/api/boards.py +++ b/backend/app/api/boards.py @@ -2,9 +2,8 @@ from __future__ import annotations -import re from typing import TYPE_CHECKING -from uuid import UUID, uuid4 +from uuid import UUID from fastapi import APIRouter, Depends, HTTPException, Query, status from sqlalchemy import func @@ -21,13 +20,7 @@ from app.core.time import utcnow from app.db import crud from app.db.pagination import paginate from app.db.session import get_session -from app.integrations.openclaw_gateway import GatewayConfig as GatewayClientConfig -from app.integrations.openclaw_gateway import ( - OpenClawGatewayError, - delete_session, - ensure_session, - send_message, -) +from app.integrations.openclaw_gateway import OpenClawGatewayError from app.models.activity_events import ActivityEvent from app.models.agents import Agent from app.models.approvals import Approval @@ -47,7 +40,7 @@ from app.schemas.pagination import DefaultLimitOffsetPage from app.schemas.view_models import BoardGroupSnapshot, BoardSnapshot from app.services.board_group_snapshot import build_board_group_snapshot from app.services.board_snapshot import build_board_snapshot -from app.services.gateway_agents import gateway_agent_session_key +from app.services.openclaw import cleanup_agent from app.services.organizations import OrganizationContext, board_access_filter if TYPE_CHECKING: @@ -56,7 +49,6 @@ if TYPE_CHECKING: router = APIRouter(prefix="/boards", tags=["boards"]) -AGENT_SESSION_PREFIX = "agent" SESSION_DEP = Depends(get_session) ORG_ADMIN_DEP = Depends(require_org_admin) ORG_MEMBER_DEP = Depends(require_org_member) @@ -70,15 +62,6 @@ INCLUDE_DONE_QUERY = Query(default=False) PER_BOARD_TASK_LIMIT_QUERY = Query(default=5, ge=0, le=100) -def _slugify(value: str) -> str: - slug = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-") - return slug or uuid4().hex - - -def _build_session_key(agent_name: str) -> str: - return f"{AGENT_SESSION_PREFIX}:{_slugify(agent_name)}:main" - - async def _require_gateway( session: AsyncSession, gateway_id: object, @@ -187,9 +170,9 @@ async def _apply_board_update( async def _board_gateway( session: AsyncSession, board: Board, -) -> tuple[Gateway | None, GatewayClientConfig | None]: +) -> Gateway | None: if not board.gateway_id: - return None, None + return None config = await Gateway.objects.by_id(board.gateway_id).first(session) if config is None: raise HTTPException( @@ -206,37 +189,7 @@ async def _board_gateway( status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail="Gateway workspace_root is required", ) - return config, GatewayClientConfig(url=config.url, token=config.token) - - -async def _cleanup_agent_on_gateway( - agent: Agent, - config: Gateway, - client_config: GatewayClientConfig, -) -> None: - if agent.openclaw_session_id: - await delete_session(agent.openclaw_session_id, config=client_config) - main_session = gateway_agent_session_key(config) - workspace_root = config.workspace_root - workspace_path = f"{workspace_root.rstrip('/')}/workspace-{_slugify(agent.name)}" - cleanup_message = ( - "Cleanup request for deleted agent.\n\n" - f"Agent name: {agent.name}\n" - f"Agent id: {agent.id}\n" - f"Session key: {agent.openclaw_session_id or _build_session_key(agent.name)}\n" - f"Workspace path: {workspace_path}\n\n" - "Actions:\n" - "1) Remove the workspace directory.\n" - "2) Delete any lingering session artifacts.\n" - "Reply NO_REPLY." - ) - await ensure_session(main_session, config=client_config, label="Gateway Agent") - await send_message( - cleanup_message, - session_key=main_session, - config=client_config, - deliver=False, - ) + return config @router.get("", response_model=DefaultLimitOffsetPage[BoardRead]) @@ -330,11 +283,11 @@ async def delete_board( await session.exec(select(Task.id).where(Task.board_id == board.id)), ) - config, client_config = await _board_gateway(session, board) - if config and client_config: + config = await _board_gateway(session, board) + if config: try: for agent in agents: - await _cleanup_agent_on_gateway(agent, config, client_config) + await cleanup_agent(agent, config) except OpenClawGatewayError as exc: raise HTTPException( status_code=status.HTTP_502_BAD_GATEWAY, diff --git a/backend/app/api/gateway.py b/backend/app/api/gateway.py index 758dea53..c37b1a39 100644 --- a/backend/app/api/gateway.py +++ b/backend/app/api/gateway.py @@ -1,32 +1,19 @@ -"""Gateway inspection and session-management endpoints.""" +"""Thin gateway session-inspection API wrappers.""" from __future__ import annotations -from collections.abc import Iterable from typing import TYPE_CHECKING -from fastapi import APIRouter, Depends, HTTPException, Query, status -from sqlmodel import col +from fastapi import APIRouter, Depends, Query from app.api.deps import require_org_admin from app.core.auth import AuthContext, get_auth_context from app.db.session import get_session -from app.integrations.openclaw_gateway import GatewayConfig as GatewayClientConfig -from app.integrations.openclaw_gateway import ( - OpenClawGatewayError, - ensure_session, - get_chat_history, - openclaw_call, - send_message, -) from app.integrations.openclaw_gateway_protocol import ( GATEWAY_EVENTS, GATEWAY_METHODS, PROTOCOL_VERSION, ) -from app.models.agents import Agent -from app.models.boards import Board -from app.models.gateways import Gateway from app.schemas.common import OkResponse from app.schemas.gateway_api import ( GatewayCommandsResponse, @@ -37,13 +24,12 @@ from app.schemas.gateway_api import ( GatewaySessionsResponse, GatewaysStatusResponse, ) -from app.services.organizations import OrganizationContext, require_board_access +from app.services.openclaw import GatewaySessionService +from app.services.organizations import OrganizationContext if TYPE_CHECKING: from sqlmodel.ext.asyncio.session import AsyncSession - from app.models.users import User - router = APIRouter(prefix="/gateways", tags=["gateways"]) SESSION_DEP = Depends(get_session) AUTH_DEP = Depends(get_auth_context) @@ -56,7 +42,7 @@ def _query_to_resolve_input( gateway_url: str | None = Query(default=None), gateway_token: str | None = Query(default=None), ) -> GatewayResolveQuery: - return GatewayResolveQuery( + return GatewaySessionService.to_resolve_query( board_id=board_id, gateway_url=gateway_url, gateway_token=gateway_token, @@ -66,94 +52,6 @@ def _query_to_resolve_input( RESOLVE_INPUT_DEP = Depends(_query_to_resolve_input) -def _as_object_list(value: object) -> list[object]: - if value is None: - return [] - if isinstance(value, list): - return value - if isinstance(value, (tuple, set)): - return list(value) - if isinstance(value, (str, bytes, dict)): - return [] - if isinstance(value, Iterable): - return list(value) - return [] - - -async def _resolve_gateway( - session: AsyncSession, - params: GatewayResolveQuery, - *, - user: User | None = None, -) -> tuple[Board | None, GatewayClientConfig, str | None]: - if params.gateway_url: - return ( - None, - GatewayClientConfig(url=params.gateway_url, token=params.gateway_token), - None, - ) - if not params.board_id: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="board_id or gateway_url is required", - ) - board = await Board.objects.by_id(params.board_id).first(session) - if board is None: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="Board not found", - ) - if user is not None: - await require_board_access(session, user=user, board=board, write=False) - if not board.gateway_id: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="Board gateway_id is required", - ) - gateway = await Gateway.objects.by_id(board.gateway_id).first(session) - if gateway is None: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="Board gateway_id is invalid", - ) - if not gateway.url: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="Gateway url is required", - ) - main_agent = ( - await Agent.objects.filter_by(gateway_id=gateway.id) - .filter(col(Agent.board_id).is_(None)) - .first(session) - ) - main_session = main_agent.openclaw_session_id if main_agent else None - return ( - board, - GatewayClientConfig(url=gateway.url, token=gateway.token), - main_session, - ) - - -async def _require_gateway( - session: AsyncSession, - board_id: str | None, - *, - user: User | None = None, -) -> tuple[Board, GatewayClientConfig, str | None]: - params = GatewayResolveQuery(board_id=board_id) - board, config, main_session = await _resolve_gateway( - session, - params, - user=user, - ) - if board is None: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="board_id is required", - ) - return board, config, main_session - - @router.get("/status", response_model=GatewaysStatusResponse) async def gateways_status( params: GatewayResolveQuery = RESOLVE_INPUT_DEP, @@ -162,46 +60,12 @@ async def gateways_status( ctx: OrganizationContext = ORG_ADMIN_DEP, ) -> GatewaysStatusResponse: """Return gateway connectivity and session status.""" - board, config, main_session = await _resolve_gateway( - session, - params, + service = GatewaySessionService(session) + return await service.get_status( + params=params, + organization_id=ctx.organization.id, user=auth.user, ) - if board is not None and board.organization_id != ctx.organization.id: - raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) - try: - sessions = await openclaw_call("sessions.list", config=config) - if isinstance(sessions, dict): - sessions_list = _as_object_list(sessions.get("sessions")) - else: - sessions_list = _as_object_list(sessions) - main_session_entry: object | None = None - main_session_error: str | None = None - if main_session: - try: - ensured = await ensure_session( - main_session, - config=config, - label="Gateway Agent", - ) - if isinstance(ensured, dict): - main_session_entry = ensured.get("entry") or ensured - except OpenClawGatewayError as exc: - main_session_error = str(exc) - return GatewaysStatusResponse( - connected=True, - gateway_url=config.url, - sessions_count=len(sessions_list), - sessions=sessions_list, - main_session=main_session_entry, - main_session_error=main_session_error, - ) - except OpenClawGatewayError as exc: - return GatewaysStatusResponse( - connected=False, - gateway_url=config.url, - error=str(exc), - ) @router.get("/sessions", response_model=GatewaySessionsResponse) @@ -212,67 +76,12 @@ async def list_gateway_sessions( ctx: OrganizationContext = ORG_ADMIN_DEP, ) -> GatewaySessionsResponse: """List sessions for a gateway associated with a board.""" - params = GatewayResolveQuery(board_id=board_id) - board, config, main_session = await _resolve_gateway( - session, - params, + service = GatewaySessionService(session) + return await service.get_sessions( + board_id=board_id, + organization_id=ctx.organization.id, user=auth.user, ) - if board is not None and board.organization_id != ctx.organization.id: - raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) - try: - sessions = await openclaw_call("sessions.list", config=config) - except OpenClawGatewayError as exc: - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail=str(exc), - ) from exc - if isinstance(sessions, dict): - sessions_list = _as_object_list(sessions.get("sessions")) - else: - sessions_list = _as_object_list(sessions) - - main_session_entry: object | None = None - if main_session: - try: - ensured = await ensure_session( - main_session, - config=config, - label="Gateway Agent", - ) - if isinstance(ensured, dict): - main_session_entry = ensured.get("entry") or ensured - except OpenClawGatewayError: - main_session_entry = None - - return GatewaySessionsResponse( - sessions=sessions_list, - main_session=main_session_entry, - ) - - -async def _list_sessions(config: GatewayClientConfig) -> list[dict[str, object]]: - sessions = await openclaw_call("sessions.list", config=config) - if isinstance(sessions, dict): - raw_items = _as_object_list(sessions.get("sessions")) - else: - raw_items = _as_object_list(sessions) - return [item for item in raw_items if isinstance(item, dict)] - - -async def _with_main_session( - sessions_list: list[dict[str, object]], - *, - config: GatewayClientConfig, - main_session: str | None, -) -> list[dict[str, object]]: - if not main_session or any(item.get("key") == main_session for item in sessions_list): - return sessions_list - try: - await ensure_session(main_session, config=config, label="Gateway Agent") - return await _list_sessions(config) - except OpenClawGatewayError: - return sessions_list @router.get("/sessions/{session_id}", response_model=GatewaySessionResponse) @@ -284,53 +93,16 @@ async def get_gateway_session( ctx: OrganizationContext = ORG_ADMIN_DEP, ) -> GatewaySessionResponse: """Get a specific gateway session by key.""" - params = GatewayResolveQuery(board_id=board_id) - board, config, main_session = await _resolve_gateway( - session, - params, + service = GatewaySessionService(session) + return await service.get_session( + session_id=session_id, + board_id=board_id, + organization_id=ctx.organization.id, user=auth.user, ) - if board is not None and board.organization_id != ctx.organization.id: - raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) - try: - sessions_list = await _list_sessions(config) - except OpenClawGatewayError as exc: - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail=str(exc), - ) from exc - sessions_list = await _with_main_session( - sessions_list, - config=config, - main_session=main_session, - ) - session_entry = next( - (item for item in sessions_list if item.get("key") == session_id), - None, - ) - if session_entry is None and main_session and session_id == main_session: - try: - ensured = await ensure_session( - main_session, - config=config, - label="Gateway Agent", - ) - if isinstance(ensured, dict): - session_entry = ensured.get("entry") or ensured - except OpenClawGatewayError: - session_entry = None - if session_entry is None: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="Session not found", - ) - return GatewaySessionResponse(session=session_entry) -@router.get( - "/sessions/{session_id}/history", - response_model=GatewaySessionHistoryResponse, -) +@router.get("/sessions/{session_id}/history", response_model=GatewaySessionHistoryResponse) async def get_session_history( session_id: str, board_id: str | None = BOARD_ID_QUERY, @@ -339,19 +111,13 @@ async def get_session_history( ctx: OrganizationContext = ORG_ADMIN_DEP, ) -> GatewaySessionHistoryResponse: """Fetch chat history for a gateway session.""" - board, config, _ = await _require_gateway(session, board_id, user=auth.user) - if board.organization_id != ctx.organization.id: - raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) - try: - history = await get_chat_history(session_id, config=config) - except OpenClawGatewayError as exc: - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail=str(exc), - ) from exc - if isinstance(history, dict) and isinstance(history.get("messages"), list): - return GatewaySessionHistoryResponse(history=history["messages"]) - return GatewaySessionHistoryResponse(history=_as_object_list(history)) + service = GatewaySessionService(session) + return await service.get_session_history( + session_id=session_id, + board_id=board_id, + organization_id=ctx.organization.id, + user=auth.user, + ) @router.post("/sessions/{session_id}/message", response_model=OkResponse) @@ -363,23 +129,13 @@ async def send_gateway_session_message( auth: AuthContext = AUTH_DEP, ) -> OkResponse: """Send a message into a specific gateway session.""" - board, config, main_session = await _require_gateway( - session, - board_id, + service = GatewaySessionService(session) + await service.send_session_message( + session_id=session_id, + payload=payload, + board_id=board_id, user=auth.user, ) - if auth.user is None: - raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED) - await require_board_access(session, user=auth.user, board=board, write=True) - try: - if main_session and session_id == main_session: - await ensure_session(main_session, config=config, label="Gateway Agent") - await send_message(payload.content, session_key=session_id, config=config) - except OpenClawGatewayError as exc: - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail=str(exc), - ) from exc return OkResponse() diff --git a/backend/app/api/gateways.py b/backend/app/api/gateways.py index df2f38ab..a0c9e98a 100644 --- a/backend/app/api/gateways.py +++ b/backend/app/api/gateways.py @@ -1,34 +1,20 @@ -"""Gateway CRUD and template synchronization endpoints.""" +"""Thin API wrappers for gateway CRUD and template synchronization.""" from __future__ import annotations -import logging -from dataclasses import dataclass from typing import TYPE_CHECKING from uuid import UUID, uuid4 -from fastapi import APIRouter, Depends, HTTPException, Query, status +from fastapi import APIRouter, Depends, Query from sqlmodel import col from app.api.deps import require_org_admin -from app.core.agent_tokens import generate_agent_token, hash_agent_token from app.core.auth import AuthContext, get_auth_context -from app.core.time import utcnow from app.db import crud from app.db.pagination import paginate from app.db.session import get_session -from app.integrations.openclaw_gateway import GatewayConfig as GatewayClientConfig -from app.integrations.openclaw_gateway import ( - OpenClawGatewayError, - ensure_session, - openclaw_call, - send_message, -) -from app.models.activity_events import ActivityEvent from app.models.agents import Agent -from app.models.approvals import Approval from app.models.gateways import Gateway -from app.models.tasks import Task from app.schemas.common import OkResponse from app.schemas.gateways import ( GatewayCreate, @@ -37,24 +23,12 @@ from app.schemas.gateways import ( GatewayUpdate, ) from app.schemas.pagination import DefaultLimitOffsetPage -from app.services.agent_provisioning import ( - DEFAULT_HEARTBEAT_CONFIG, - MainAgentProvisionRequest, - ProvisionOptions, - provision_main_agent, -) -from app.services.gateway_agents import ( - gateway_agent_session_key, - gateway_openclaw_agent_id, -) -from app.services.template_sync import GatewayTemplateSyncOptions -from app.services.template_sync import sync_gateway_templates as sync_gateway_templates_service +from app.services.openclaw import GatewayAdminLifecycleService, GatewayTemplateSyncQuery if TYPE_CHECKING: from fastapi_pagination.limit_offset import LimitOffsetPage from sqlmodel.ext.asyncio.session import AsyncSession - from app.models.users import User from app.services.organizations import OrganizationContext router = APIRouter(prefix="/gateways", tags=["gateways"]) @@ -67,16 +41,6 @@ ROTATE_TOKENS_QUERY = Query(default=False) FORCE_BOOTSTRAP_QUERY = Query(default=False) BOARD_ID_QUERY = Query(default=None) _RUNTIME_TYPE_REFERENCES = (UUID,) -logger = logging.getLogger(__name__) - - -@dataclass(frozen=True) -class _TemplateSyncQuery: - include_main: bool - reset_sessions: bool - rotate_tokens: bool - force_bootstrap: bool - board_id: UUID | None def _template_sync_query( @@ -86,8 +50,8 @@ def _template_sync_query( rotate_tokens: bool = ROTATE_TOKENS_QUERY, force_bootstrap: bool = FORCE_BOOTSTRAP_QUERY, board_id: UUID | None = BOARD_ID_QUERY, -) -> _TemplateSyncQuery: - return _TemplateSyncQuery( +) -> GatewayTemplateSyncQuery: + return GatewayTemplateSyncQuery( include_main=include_main, reset_sessions=reset_sessions, rotate_tokens=rotate_tokens, @@ -99,301 +63,15 @@ def _template_sync_query( SYNC_QUERY_DEP = Depends(_template_sync_query) -def _main_agent_name(gateway: Gateway) -> str: - return f"{gateway.name} Gateway Agent" - - -def _gateway_identity_profile() -> dict[str, str]: - return { - "role": "Gateway Agent", - "communication_style": "direct, concise, practical", - "emoji": ":compass:", - } - - -async def _require_gateway( - session: AsyncSession, - *, - gateway_id: UUID, - organization_id: UUID, -) -> Gateway: - gateway = ( - await Gateway.objects.by_id(gateway_id) - .filter(col(Gateway.organization_id) == organization_id) - .first(session) - ) - if gateway is None: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="Gateway not found", - ) - return gateway - - -async def _find_main_agent( - session: AsyncSession, - gateway: Gateway, -) -> Agent | None: - return ( - await Agent.objects.filter_by(gateway_id=gateway.id) - .filter(col(Agent.board_id).is_(None)) - .first(session) - ) - - -async def _upsert_main_agent_record( - session: AsyncSession, - gateway: Gateway, -) -> tuple[Agent, bool]: - changed = False - session_key = gateway_agent_session_key(gateway) - agent = await _find_main_agent(session, gateway) - if agent is None: - agent = Agent( - name=_main_agent_name(gateway), - status="provisioning", - board_id=None, - gateway_id=gateway.id, - is_board_lead=False, - openclaw_session_id=session_key, - heartbeat_config=DEFAULT_HEARTBEAT_CONFIG.copy(), - identity_profile=_gateway_identity_profile(), - ) - session.add(agent) - changed = True - if agent.board_id is not None: - agent.board_id = None - changed = True - if agent.gateway_id != gateway.id: - agent.gateway_id = gateway.id - changed = True - if agent.is_board_lead: - agent.is_board_lead = False - changed = True - if agent.name != _main_agent_name(gateway): - agent.name = _main_agent_name(gateway) - changed = True - if agent.openclaw_session_id != session_key: - agent.openclaw_session_id = session_key - changed = True - if agent.heartbeat_config is None: - agent.heartbeat_config = DEFAULT_HEARTBEAT_CONFIG.copy() - changed = True - if agent.identity_profile is None: - agent.identity_profile = _gateway_identity_profile() - changed = True - if not agent.status: - agent.status = "provisioning" - changed = True - if changed: - agent.updated_at = utcnow() - session.add(agent) - return agent, changed - - -async def _ensure_gateway_agents_exist( - session: AsyncSession, - gateways: list[Gateway], -) -> None: - for gateway in gateways: - agent, gateway_changed = await _upsert_main_agent_record(session, gateway) - has_gateway_entry = await _gateway_has_main_agent_entry(gateway) - needs_provision = gateway_changed or not bool(agent.agent_token_hash) or not has_gateway_entry - if needs_provision: - await _provision_main_agent_record( - session, - gateway, - agent, - user=None, - action="provision", - notify=False, - ) - - -def _extract_agent_id_from_entry(item: object) -> str | None: - if isinstance(item, str): - value = item.strip() - return value or None - if not isinstance(item, dict): - return None - for key in ("id", "agentId", "agent_id"): - raw = item.get(key) - if isinstance(raw, str) and raw.strip(): - return raw.strip() - return None - - -def _extract_agents_list(payload: object) -> list[object]: - if isinstance(payload, list): - return [item for item in payload] - if not isinstance(payload, dict): - return [] - agents = payload.get("agents") or [] - if not isinstance(agents, list): - return [] - return [item for item in agents] - - -async def _gateway_has_main_agent_entry(gateway: Gateway) -> bool: - if not gateway.url: - return False - config = GatewayClientConfig(url=gateway.url, token=gateway.token) - target_id = gateway_openclaw_agent_id(gateway) - try: - payload = await openclaw_call("agents.list", config=config) - except OpenClawGatewayError: - # Avoid treating transient gateway connectivity issues as a missing agent entry. - return True - for item in _extract_agents_list(payload): - if _extract_agent_id_from_entry(item) == target_id: - return True - return False - - -async def _provision_main_agent_record( - session: AsyncSession, - gateway: Gateway, - agent: Agent, - *, - user: User | None, - action: str, - notify: bool, -) -> Agent: - session_key = gateway_agent_session_key(gateway) - raw_token = generate_agent_token() - agent.agent_token_hash = hash_agent_token(raw_token) - agent.provision_requested_at = utcnow() - agent.provision_action = action - agent.updated_at = utcnow() - if agent.heartbeat_config is None: - agent.heartbeat_config = DEFAULT_HEARTBEAT_CONFIG.copy() - session.add(agent) - await session.commit() - await session.refresh(agent) - if not gateway.url: - return agent - try: - await provision_main_agent( - agent, - MainAgentProvisionRequest( - gateway=gateway, - auth_token=raw_token, - user=user, - session_key=session_key, - options=ProvisionOptions(action=action), - ), - ) - await ensure_session( - session_key, - config=GatewayClientConfig(url=gateway.url, token=gateway.token), - label=agent.name, - ) - if notify: - await send_message( - ( - f"Hello {agent.name}. Your gateway provisioning was updated.\n\n" - "Please re-read AGENTS.md, USER.md, HEARTBEAT.md, and TOOLS.md. " - "If BOOTSTRAP.md exists, run it once then delete it. " - "Begin heartbeats after startup." - ), - session_key=session_key, - config=GatewayClientConfig(url=gateway.url, token=gateway.token), - deliver=True, - ) - except OpenClawGatewayError as exc: - logger.warning( - "gateway.main_agent.provision_failed_gateway gateway_id=%s agent_id=%s error=%s", - gateway.id, - agent.id, - str(exc), - ) - except (OSError, RuntimeError, ValueError) as exc: - logger.warning( - "gateway.main_agent.provision_failed gateway_id=%s agent_id=%s error=%s", - gateway.id, - agent.id, - str(exc), - ) - except Exception as exc: # pragma: no cover - defensive fallback - logger.warning( - "gateway.main_agent.provision_failed_unexpected gateway_id=%s agent_id=%s " - "error_type=%s error=%s", - gateway.id, - agent.id, - exc.__class__.__name__, - str(exc), - ) - return agent - - -async def _ensure_main_agent( - session: AsyncSession, - gateway: Gateway, - auth: AuthContext, - *, - action: str = "provision", -) -> Agent: - agent, _ = await _upsert_main_agent_record(session, gateway) - return await _provision_main_agent_record( - session, - gateway, - agent, - user=auth.user, - action=action, - notify=True, - ) - - -async def _clear_agent_foreign_keys( - session: AsyncSession, - *, - agent_id: UUID, -) -> None: - now = utcnow() - await crud.update_where( - session, - Task, - col(Task.assigned_agent_id) == agent_id, - col(Task.status) == "in_progress", - assigned_agent_id=None, - status="inbox", - in_progress_at=None, - updated_at=now, - commit=False, - ) - await crud.update_where( - session, - Task, - col(Task.assigned_agent_id) == agent_id, - col(Task.status) != "in_progress", - assigned_agent_id=None, - updated_at=now, - commit=False, - ) - await crud.update_where( - session, - ActivityEvent, - col(ActivityEvent.agent_id) == agent_id, - agent_id=None, - commit=False, - ) - await crud.update_where( - session, - Approval, - col(Approval.agent_id) == agent_id, - agent_id=None, - commit=False, - ) - - @router.get("", response_model=DefaultLimitOffsetPage[GatewayRead]) async def list_gateways( session: AsyncSession = SESSION_DEP, ctx: OrganizationContext = ORG_ADMIN_DEP, ) -> LimitOffsetPage[GatewayRead]: """List gateways for the caller's organization.""" + service = GatewayAdminLifecycleService(session) gateways = await Gateway.objects.filter_by(organization_id=ctx.organization.id).all(session) - await _ensure_gateway_agents_exist(session, gateways) + await service.ensure_gateway_agents_exist(gateways) statement = ( Gateway.objects.filter_by(organization_id=ctx.organization.id) .order_by(col(Gateway.created_at).desc()) @@ -410,12 +88,13 @@ async def create_gateway( ctx: OrganizationContext = ORG_ADMIN_DEP, ) -> Gateway: """Create a gateway and provision or refresh its main agent.""" + service = GatewayAdminLifecycleService(session) data = payload.model_dump() gateway_id = uuid4() data["id"] = gateway_id data["organization_id"] = ctx.organization.id gateway = await crud.create(session, Gateway, **data) - await _ensure_main_agent(session, gateway, auth, action="provision") + await service.ensure_main_agent(gateway, auth, action="provision") return gateway @@ -426,12 +105,12 @@ async def get_gateway( ctx: OrganizationContext = ORG_ADMIN_DEP, ) -> Gateway: """Return one gateway by id for the caller's organization.""" - gateway = await _require_gateway( - session, + service = GatewayAdminLifecycleService(session) + gateway = await service.require_gateway( gateway_id=gateway_id, organization_id=ctx.organization.id, ) - await _ensure_gateway_agents_exist(session, [gateway]) + await service.ensure_gateway_agents_exist([gateway]) return gateway @@ -444,49 +123,32 @@ async def update_gateway( ctx: OrganizationContext = ORG_ADMIN_DEP, ) -> Gateway: """Patch a gateway and refresh the main-agent provisioning state.""" - gateway = await _require_gateway( - session, + service = GatewayAdminLifecycleService(session) + gateway = await service.require_gateway( gateway_id=gateway_id, organization_id=ctx.organization.id, ) updates = payload.model_dump(exclude_unset=True) await crud.patch(session, gateway, updates) - await _ensure_main_agent( - session, - gateway, - auth, - action="update", - ) + await service.ensure_main_agent(gateway, auth, action="update") return gateway @router.post("/{gateway_id}/templates/sync", response_model=GatewayTemplatesSyncResult) async def sync_gateway_templates( gateway_id: UUID, - sync_query: _TemplateSyncQuery = SYNC_QUERY_DEP, + sync_query: GatewayTemplateSyncQuery = SYNC_QUERY_DEP, session: AsyncSession = SESSION_DEP, auth: AuthContext = AUTH_DEP, ctx: OrganizationContext = ORG_ADMIN_DEP, ) -> GatewayTemplatesSyncResult: """Sync templates for a gateway and optionally rotate runtime settings.""" - gateway = await _require_gateway( - session, + service = GatewayAdminLifecycleService(session) + gateway = await service.require_gateway( gateway_id=gateway_id, organization_id=ctx.organization.id, ) - await _ensure_gateway_agents_exist(session, [gateway]) - return await sync_gateway_templates_service( - session, - gateway, - GatewayTemplateSyncOptions( - user=auth.user, - include_main=sync_query.include_main, - reset_sessions=sync_query.reset_sessions, - rotate_tokens=sync_query.rotate_tokens, - force_bootstrap=sync_query.force_bootstrap, - board_id=sync_query.board_id, - ), - ) + return await service.sync_templates(gateway, query=sync_query, auth=auth) @router.delete("/{gateway_id}", response_model=OkResponse) @@ -496,14 +158,14 @@ async def delete_gateway( ctx: OrganizationContext = ORG_ADMIN_DEP, ) -> OkResponse: """Delete a gateway in the caller's organization.""" - gateway = await _require_gateway( - session, + service = GatewayAdminLifecycleService(session) + gateway = await service.require_gateway( gateway_id=gateway_id, organization_id=ctx.organization.id, ) - main_agent = await _find_main_agent(session, gateway) + main_agent = await service.find_main_agent(gateway) if main_agent is not None: - await _clear_agent_foreign_keys(session, agent_id=main_agent.id) + await service.clear_agent_foreign_keys(agent_id=main_agent.id) await session.delete(main_agent) duplicate_main_agents = await Agent.objects.filter_by( @@ -513,7 +175,7 @@ async def delete_gateway( for agent in duplicate_main_agents: if main_agent is not None and agent.id == main_agent.id: continue - await _clear_agent_foreign_keys(session, agent_id=agent.id) + await service.clear_agent_foreign_keys(agent_id=agent.id) await session.delete(agent) await session.delete(gateway) diff --git a/backend/app/api/tasks.py b/backend/app/api/tasks.py index 8db6fea9..a1f1e277 100644 --- a/backend/app/api/tasks.py +++ b/backend/app/api/tasks.py @@ -29,12 +29,11 @@ from app.db import crud from app.db.pagination import paginate from app.db.session import async_session_maker, get_session from app.integrations.openclaw_gateway import GatewayConfig as GatewayClientConfig -from app.integrations.openclaw_gateway import OpenClawGatewayError, ensure_session, send_message +from app.integrations.openclaw_gateway import OpenClawGatewayError from app.models.activity_events import ActivityEvent from app.models.agents import Agent from app.models.approvals import Approval from app.models.boards import Board -from app.models.gateways import Gateway from app.models.task_dependencies import TaskDependency from app.models.task_fingerprints import TaskFingerprint from app.models.tasks import Task @@ -44,6 +43,7 @@ from app.schemas.pagination import DefaultLimitOffsetPage from app.schemas.tasks import TaskCommentCreate, TaskCommentRead, TaskCreate, TaskRead, TaskUpdate from app.services.activity_log import record_activity from app.services.mentions import extract_mentions, matches_agent_mention +from app.services.openclaw import optional_gateway_config_for_board, send_gateway_agent_message from app.services.organizations import require_board_access from app.services.task_dependencies import ( blocked_by_dependency_ids, @@ -301,26 +301,19 @@ def _serialize_comment(event: ActivityEvent) -> dict[str, object]: return TaskCommentRead.model_validate(event).model_dump(mode="json") -async def _gateway_config( - session: AsyncSession, - board: Board, -) -> GatewayClientConfig | None: - if not board.gateway_id: - return None - gateway = await Gateway.objects.by_id(board.gateway_id).first(session) - if gateway is None or not gateway.url: - return None - return GatewayClientConfig(url=gateway.url, token=gateway.token) - - async def _send_lead_task_message( *, session_key: str, config: GatewayClientConfig, message: str, ) -> None: - await ensure_session(session_key, config=config, label="Lead Agent") - await send_message(message, session_key=session_key, config=config, deliver=False) + await send_gateway_agent_message( + session_key=session_key, + config=config, + agent_name="Lead Agent", + message=message, + deliver=False, + ) async def _send_agent_task_message( @@ -330,8 +323,13 @@ async def _send_agent_task_message( agent_name: str, message: str, ) -> None: - await ensure_session(session_key, config=config, label=agent_name) - await send_message(message, session_key=session_key, config=config, deliver=False) + await send_gateway_agent_message( + session_key=session_key, + config=config, + agent_name=agent_name, + message=message, + deliver=False, + ) async def _notify_agent_on_task_assign( @@ -343,7 +341,7 @@ async def _notify_agent_on_task_assign( ) -> None: if not agent.openclaw_session_id: return - config = await _gateway_config(session, board) + config = await optional_gateway_config_for_board(session, board) if config is None: return description = _truncate_snippet(task.description or "") @@ -415,7 +413,7 @@ async def _notify_lead_on_task_create( ) if lead is None or not lead.openclaw_session_id: return - config = await _gateway_config(session, board) + config = await optional_gateway_config_for_board(session, board) if config is None: return description = _truncate_snippet(task.description or "") @@ -470,7 +468,7 @@ async def _notify_lead_on_task_unassigned( ) if lead is None or not lead.openclaw_session_id: return - config = await _gateway_config(session, board) + config = await optional_gateway_config_for_board(session, board) if config is None: return description = _truncate_snippet(task.description or "") @@ -1029,7 +1027,7 @@ async def _notify_task_comment_targets( if request.task.board_id else None ) - config = await _gateway_config(session, board) if board else None + config = await optional_gateway_config_for_board(session, board) if board else None if not board or not config: return diff --git a/backend/app/services/board_leads.py b/backend/app/services/board_leads.py deleted file mode 100644 index 2f189bad..00000000 --- a/backend/app/services/board_leads.py +++ /dev/null @@ -1,159 +0,0 @@ -"""Helpers for ensuring each board has a provisioned lead agent.""" - -from __future__ import annotations - -from dataclasses import dataclass, field -from typing import TYPE_CHECKING, Any - -from sqlmodel import col, select - -from app.core.agent_tokens import generate_agent_token, hash_agent_token -from app.core.time import utcnow -from app.integrations.openclaw_gateway import GatewayConfig as GatewayClientConfig -from app.integrations.openclaw_gateway import OpenClawGatewayError, ensure_session, send_message -from app.models.agents import Agent -from app.services.agent_provisioning import ( - DEFAULT_HEARTBEAT_CONFIG, - AgentProvisionRequest, - ProvisionOptions, - provision_agent, -) - -if TYPE_CHECKING: - from sqlmodel.ext.asyncio.session import AsyncSession - - from app.models.boards import Board - from app.models.gateways import Gateway - from app.models.users import User - - -def lead_session_key(board: Board) -> str: - """Return the deterministic main session key for a board lead agent.""" - return f"agent:lead-{board.id}:main" - - -def lead_agent_name(_: Board) -> str: - """Return the default display name for board lead agents.""" - return "Lead Agent" - - -@dataclass(frozen=True, slots=True) -class LeadAgentOptions: - """Optional overrides for board-lead provisioning behavior.""" - - agent_name: str | None = None - identity_profile: dict[str, str] | None = None - action: str = "provision" - - -@dataclass(frozen=True, slots=True) -class LeadAgentRequest: - """Inputs required to ensure or provision a board lead agent.""" - - board: Board - gateway: Gateway - config: GatewayClientConfig - user: User | None - options: LeadAgentOptions = field(default_factory=LeadAgentOptions) - - -async def ensure_board_lead_agent( - session: AsyncSession, - *, - request: LeadAgentRequest, -) -> tuple[Agent, bool]: - """Ensure a board has a lead agent; return `(agent, created)`.""" - board = request.board - config_options = request.options - existing = ( - await session.exec( - select(Agent) - .where(Agent.board_id == board.id) - .where(col(Agent.is_board_lead).is_(True)), - ) - ).first() - if existing: - desired_name = config_options.agent_name or lead_agent_name(board) - changed = False - if existing.name != desired_name: - existing.name = desired_name - changed = True - if existing.gateway_id != request.gateway.id: - existing.gateway_id = request.gateway.id - changed = True - desired_session_key = lead_session_key(board) - if not existing.openclaw_session_id: - existing.openclaw_session_id = desired_session_key - changed = True - if changed: - existing.updated_at = utcnow() - session.add(existing) - await session.commit() - await session.refresh(existing) - return existing, False - - merged_identity_profile: dict[str, Any] = { - "role": "Board Lead", - "communication_style": "direct, concise, practical", - "emoji": ":gear:", - } - if config_options.identity_profile: - merged_identity_profile.update( - { - key: value.strip() - for key, value in config_options.identity_profile.items() - if value.strip() - }, - ) - - agent = Agent( - name=config_options.agent_name or lead_agent_name(board), - status="provisioning", - board_id=board.id, - gateway_id=request.gateway.id, - is_board_lead=True, - heartbeat_config=DEFAULT_HEARTBEAT_CONFIG.copy(), - identity_profile=merged_identity_profile, - openclaw_session_id=lead_session_key(board), - provision_requested_at=utcnow(), - provision_action=config_options.action, - ) - raw_token = generate_agent_token() - agent.agent_token_hash = hash_agent_token(raw_token) - session.add(agent) - await session.commit() - await session.refresh(agent) - - try: - await provision_agent( - agent, - AgentProvisionRequest( - board=board, - gateway=request.gateway, - auth_token=raw_token, - user=request.user, - options=ProvisionOptions(action=config_options.action), - ), - ) - if agent.openclaw_session_id: - await ensure_session( - agent.openclaw_session_id, - config=request.config, - label=agent.name, - ) - await send_message( - ( - f"Hello {agent.name}. Your workspace has been provisioned.\n\n" - "Start the agent, run BOOT.md, and if BOOTSTRAP.md exists run " - "it once " - "then delete it. Begin heartbeats after startup." - ), - session_key=agent.openclaw_session_id, - config=request.config, - deliver=True, - ) - except OpenClawGatewayError: - # Best-effort provisioning. The board/agent rows should still exist. - pass - - return agent, True diff --git a/backend/app/services/board_snapshot.py b/backend/app/services/board_snapshot.py index 4df19252..d6fb1b59 100644 --- a/backend/app/services/board_snapshot.py +++ b/backend/app/services/board_snapshot.py @@ -2,22 +2,20 @@ from __future__ import annotations -from datetime import timedelta from typing import TYPE_CHECKING from sqlalchemy import case, func from sqlmodel import col, select -from app.core.time import utcnow from app.models.agents import Agent from app.models.approvals import Approval from app.models.board_memory import BoardMemory from app.models.tasks import Task -from app.schemas.agents import AgentRead from app.schemas.approvals import ApprovalRead from app.schemas.board_memory import BoardMemoryRead from app.schemas.boards import BoardRead from app.schemas.view_models import BoardSnapshot, TaskCardRead +from app.services.openclaw import AgentLifecycleService from app.services.task_dependencies import ( blocked_by_dependency_ids, dependency_ids_by_task_id, @@ -31,31 +29,6 @@ if TYPE_CHECKING: from app.models.boards import Board -OFFLINE_AFTER = timedelta(minutes=10) - - -def _computed_agent_status(agent: Agent) -> str: - now = utcnow() - if agent.status in {"deleting", "updating"}: - return agent.status - if agent.last_seen_at is None: - return "provisioning" - if now - agent.last_seen_at > OFFLINE_AFTER: - return "offline" - return agent.status - - -def _agent_to_read(agent: Agent) -> AgentRead: - model = AgentRead.model_validate(agent, from_attributes=True) - computed_status = _computed_agent_status(agent) - is_gateway_main = agent.gateway_id is not None and agent.board_id is None - return model.model_copy( - update={ - "status": computed_status, - "is_gateway_main": is_gateway_main, - }, - ) - def _memory_to_read(memory: BoardMemory) -> BoardMemoryRead: return BoardMemoryRead.model_validate(memory, from_attributes=True) @@ -125,7 +98,10 @@ async def build_board_snapshot(session: AsyncSession, board: Board) -> BoardSnap .order_by(col(Agent.created_at).desc()) .all(session) ) - agent_reads = [_agent_to_read(agent) for agent in agents] + agent_reads = [ + AgentLifecycleService.to_agent_read(AgentLifecycleService.with_computed_status(agent)) + for agent in agents + ] agent_name_by_id = {agent.id: agent.name for agent in agents} pending_approvals_count = int( diff --git a/backend/app/services/gateway_agents.py b/backend/app/services/gateway_agents.py deleted file mode 100644 index 0b9b082e..00000000 --- a/backend/app/services/gateway_agents.py +++ /dev/null @@ -1,31 +0,0 @@ -"""Helpers for dedicated gateway-scoped agent identity/session keys.""" - -from __future__ import annotations - -from uuid import UUID - -from app.models.gateways import Gateway - -_GATEWAY_AGENT_PREFIX = "agent:gateway-" -_GATEWAY_AGENT_SUFFIX = ":main" -_GATEWAY_OPENCLAW_AGENT_PREFIX = "mc-gateway-" - - -def gateway_agent_session_key_for_id(gateway_id: UUID) -> str: - """Return the dedicated Mission Control gateway-agent session key for an id.""" - return f"{_GATEWAY_AGENT_PREFIX}{gateway_id}{_GATEWAY_AGENT_SUFFIX}" - - -def gateway_agent_session_key(gateway: Gateway) -> str: - """Return the dedicated Mission Control gateway-agent session key.""" - return gateway_agent_session_key_for_id(gateway.id) - - -def gateway_openclaw_agent_id_for_id(gateway_id: UUID) -> str: - """Return the dedicated OpenClaw config `agentId` for a gateway agent.""" - return f"{_GATEWAY_OPENCLAW_AGENT_PREFIX}{gateway_id}" - - -def gateway_openclaw_agent_id(gateway: Gateway) -> str: - """Return the dedicated OpenClaw config `agentId` for a gateway agent.""" - return gateway_openclaw_agent_id_for_id(gateway.id) diff --git a/backend/app/services/openclaw/__init__.py b/backend/app/services/openclaw/__init__.py new file mode 100644 index 00000000..0d8c3597 --- /dev/null +++ b/backend/app/services/openclaw/__init__.py @@ -0,0 +1,7 @@ +"""OpenClaw lifecycle services package.""" + +from .constants import * # noqa: F401,F403 +from .exceptions import * # noqa: F401,F403 +from .provisioning import * # noqa: F401,F403 +from .services import * # noqa: F401,F403 +from .shared import * # noqa: F401,F403 diff --git a/backend/app/services/openclaw/constants.py b/backend/app/services/openclaw/constants.py new file mode 100644 index 00000000..345f3eff --- /dev/null +++ b/backend/app/services/openclaw/constants.py @@ -0,0 +1,120 @@ +"""Shared constants for lifecycle orchestration services.""" + +from __future__ import annotations + +import random +import re +from datetime import timedelta +from typing import Any + +_GATEWAY_AGENT_PREFIX = "agent:gateway-" +_GATEWAY_AGENT_SUFFIX = ":main" +_GATEWAY_OPENCLAW_AGENT_PREFIX = "mc-gateway-" + +DEFAULT_HEARTBEAT_CONFIG: dict[str, Any] = { + "every": "10m", + "target": "none", + "includeReasoning": False, +} + +OFFLINE_AFTER = timedelta(minutes=10) +AGENT_SESSION_PREFIX = "agent" + +DEFAULT_CHANNEL_HEARTBEAT_VISIBILITY: dict[str, bool] = { + # Suppress routine HEARTBEAT_OK delivery by default. + "showOk": False, + "showAlerts": True, + "useIndicator": True, +} + +DEFAULT_IDENTITY_PROFILE = { + "role": "Generalist", + "communication_style": "direct, concise, practical", + "emoji": ":gear:", +} + +IDENTITY_PROFILE_FIELDS = { + "role": "identity_role", + "communication_style": "identity_communication_style", + "emoji": "identity_emoji", +} + +EXTRA_IDENTITY_PROFILE_FIELDS = { + "autonomy_level": "identity_autonomy_level", + "verbosity": "identity_verbosity", + "output_format": "identity_output_format", + "update_cadence": "identity_update_cadence", + # Per-agent charter (optional). + # Used to give agents a "purpose in life" and a distinct vibe. + "purpose": "identity_purpose", + "personality": "identity_personality", + "custom_instructions": "identity_custom_instructions", +} + +DEFAULT_GATEWAY_FILES = frozenset( + { + "AGENTS.md", + "SOUL.md", + "TASK_SOUL.md", + "SELF.md", + "AUTONOMY.md", + "TOOLS.md", + "IDENTITY.md", + "USER.md", + "HEARTBEAT.md", + "BOOT.md", + "BOOTSTRAP.md", + "MEMORY.md", + }, +) + +# These files are intended to evolve within the agent workspace. +# Provision them if missing, but avoid overwriting existing content during updates. +# +# Examples: +# - SELF.md: evolving identity/preferences +# - USER.md: human-provided context + lead intake notes +# - MEMORY.md: curated long-term memory (consolidated) +PRESERVE_AGENT_EDITABLE_FILES = frozenset({"SELF.md", "USER.md", "MEMORY.md", "TASK_SOUL.md"}) + +HEARTBEAT_LEAD_TEMPLATE = "HEARTBEAT_LEAD.md" +HEARTBEAT_AGENT_TEMPLATE = "HEARTBEAT_AGENT.md" +SESSION_KEY_PARTS_MIN = 2 +_SESSION_KEY_PARTS_MIN = SESSION_KEY_PARTS_MIN + +MAIN_TEMPLATE_MAP = { + "AGENTS.md": "MAIN_AGENTS.md", + "HEARTBEAT.md": "MAIN_HEARTBEAT.md", + "USER.md": "MAIN_USER.md", + "BOOT.md": "MAIN_BOOT.md", + "TOOLS.md": "MAIN_TOOLS.md", +} + +_TOOLS_KV_RE = re.compile(r"^(?P[A-Z0-9_]+)=(?P.*)$") +_NON_TRANSIENT_GATEWAY_ERROR_MARKERS = ("unsupported file",) +_TRANSIENT_GATEWAY_ERROR_MARKERS = ( + "connect call failed", + "connection refused", + "errno 111", + "econnrefused", + "did not receive a valid http response", + "no route to host", + "network is unreachable", + "host is down", + "name or service not known", + "received 1012", + "service restart", + "http 503", + "http 502", + "http 504", + "temporar", + "timeout", + "timed out", + "connection closed", + "connection reset", +) + +_COORDINATION_GATEWAY_TIMEOUT_S = 45.0 +_COORDINATION_GATEWAY_BASE_DELAY_S = 0.5 +_COORDINATION_GATEWAY_MAX_DELAY_S = 5.0 +_SECURE_RANDOM = random.SystemRandom() diff --git a/backend/app/services/openclaw/exceptions.py b/backend/app/services/openclaw/exceptions.py new file mode 100644 index 00000000..b1dfe95e --- /dev/null +++ b/backend/app/services/openclaw/exceptions.py @@ -0,0 +1,90 @@ +"""OpenClaw-specific exception definitions and mapping helpers.""" + +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum + +from fastapi import HTTPException, status + + +class GatewayOperation(str, Enum): + """Typed gateway operations used for consistent HTTP error mapping.""" + + NUDGE_AGENT = "nudge_agent" + SOUL_READ = "soul_read" + SOUL_WRITE = "soul_write" + ASK_USER_DISPATCH = "ask_user_dispatch" + LEAD_MESSAGE_DISPATCH = "lead_message_dispatch" + LEAD_BROADCAST_DISPATCH = "lead_broadcast_dispatch" + ONBOARDING_START_DISPATCH = "onboarding_start_dispatch" + ONBOARDING_ANSWER_DISPATCH = "onboarding_answer_dispatch" + + +@dataclass(frozen=True, slots=True) +class GatewayErrorPolicy: + """HTTP policy for mapping gateway operation failures.""" + + status_code: int + detail_template: str + + +_GATEWAY_ERROR_POLICIES: dict[GatewayOperation, GatewayErrorPolicy] = { + GatewayOperation.NUDGE_AGENT: GatewayErrorPolicy( + status_code=status.HTTP_502_BAD_GATEWAY, + detail_template="Gateway nudge failed: {error}", + ), + GatewayOperation.SOUL_READ: GatewayErrorPolicy( + status_code=status.HTTP_502_BAD_GATEWAY, + detail_template="Gateway SOUL read failed: {error}", + ), + GatewayOperation.SOUL_WRITE: GatewayErrorPolicy( + status_code=status.HTTP_502_BAD_GATEWAY, + detail_template="Gateway SOUL update failed: {error}", + ), + GatewayOperation.ASK_USER_DISPATCH: GatewayErrorPolicy( + status_code=status.HTTP_502_BAD_GATEWAY, + detail_template="Gateway ask-user dispatch failed: {error}", + ), + GatewayOperation.LEAD_MESSAGE_DISPATCH: GatewayErrorPolicy( + status_code=status.HTTP_502_BAD_GATEWAY, + detail_template="Gateway lead message dispatch failed: {error}", + ), + GatewayOperation.LEAD_BROADCAST_DISPATCH: GatewayErrorPolicy( + status_code=status.HTTP_502_BAD_GATEWAY, + detail_template="Gateway lead broadcast dispatch failed: {error}", + ), + GatewayOperation.ONBOARDING_START_DISPATCH: GatewayErrorPolicy( + status_code=status.HTTP_502_BAD_GATEWAY, + detail_template="Gateway onboarding start dispatch failed: {error}", + ), + GatewayOperation.ONBOARDING_ANSWER_DISPATCH: GatewayErrorPolicy( + status_code=status.HTTP_502_BAD_GATEWAY, + detail_template="Gateway onboarding answer dispatch failed: {error}", + ), +} + + +def map_gateway_error_to_http_exception( + operation: GatewayOperation, + exc: Exception, +) -> HTTPException: + """Map a gateway failure into a typed HTTP exception.""" + policy = _GATEWAY_ERROR_POLICIES[operation] + return HTTPException( + status_code=policy.status_code, + detail=policy.detail_template.format(error=str(exc)), + ) + + +def map_gateway_error_message( + operation: GatewayOperation, + exc: Exception, +) -> str: + """Map a gateway failure into a stable error message string.""" + if isinstance(exc, HTTPException): + detail = exc.detail + if isinstance(detail, str): + return detail + return str(detail) + return map_gateway_error_to_http_exception(operation, exc).detail diff --git a/backend/app/services/agent_provisioning.py b/backend/app/services/openclaw/provisioning.py similarity index 57% rename from backend/app/services/agent_provisioning.py rename to backend/app/services/openclaw/provisioning.py index 6abf4379..1a436c15 100644 --- a/backend/app/services/agent_provisioning.py +++ b/backend/app/services/openclaw/provisioning.py @@ -1,106 +1,65 @@ -"""Gateway-facing agent provisioning and cleanup helpers.""" +"""Provisioning, template sync, and board-lead lifecycle orchestration.""" from __future__ import annotations -from abc import ABC, abstractmethod +import asyncio import hashlib import json import re +from abc import ABC, abstractmethod +from collections.abc import Awaitable, Callable from contextlib import suppress from dataclasses import dataclass, field from pathlib import Path -from typing import TYPE_CHECKING, Any -from uuid import uuid4 +from typing import TYPE_CHECKING, Any, TypeVar +from uuid import UUID, uuid4 from jinja2 import Environment, FileSystemLoader, StrictUndefined, select_autoescape +from sqlalchemy import func +from sqlmodel import col, select +from app.core.agent_tokens import generate_agent_token, hash_agent_token, verify_agent_token from app.core.config import settings +from app.core.time import utcnow from app.integrations.openclaw_gateway import GatewayConfig as GatewayClientConfig -from app.integrations.openclaw_gateway import OpenClawGatewayError, ensure_session, openclaw_call -from app.services.gateway_agents import ( - gateway_agent_session_key, - gateway_openclaw_agent_id, +from app.integrations.openclaw_gateway import ( + OpenClawGatewayError, + ensure_session, + openclaw_call, + send_message, ) +from app.models.agents import Agent +from app.models.board_memory import BoardMemory +from app.models.boards import Board +from app.models.gateways import Gateway +from app.schemas.gateways import GatewayTemplatesSyncError, GatewayTemplatesSyncResult +from app.services.openclaw.constants import ( + _COORDINATION_GATEWAY_BASE_DELAY_S, + _COORDINATION_GATEWAY_MAX_DELAY_S, + _COORDINATION_GATEWAY_TIMEOUT_S, + _NON_TRANSIENT_GATEWAY_ERROR_MARKERS, + _SECURE_RANDOM, + _SESSION_KEY_PARTS_MIN, + _TOOLS_KV_RE, + _TRANSIENT_GATEWAY_ERROR_MARKERS, + DEFAULT_CHANNEL_HEARTBEAT_VISIBILITY, + DEFAULT_GATEWAY_FILES, + DEFAULT_HEARTBEAT_CONFIG, + DEFAULT_IDENTITY_PROFILE, + EXTRA_IDENTITY_PROFILE_FIELDS, + HEARTBEAT_AGENT_TEMPLATE, + HEARTBEAT_LEAD_TEMPLATE, + IDENTITY_PROFILE_FIELDS, + MAIN_TEMPLATE_MAP, + PRESERVE_AGENT_EDITABLE_FILES, +) +from app.services.openclaw.shared import GatewayAgentIdentity if TYPE_CHECKING: - from app.models.agents import Agent - from app.models.boards import Board - from app.models.gateways import Gateway + from sqlmodel.ext.asyncio.session import AsyncSession + from app.models.users import User -DEFAULT_HEARTBEAT_CONFIG: dict[str, Any] = { - "every": "10m", - "target": "none", - # Keep heartbeat delivery concise by default. - "includeReasoning": False, -} -DEFAULT_CHANNEL_HEARTBEAT_VISIBILITY: dict[str, bool] = { - # Suppress routine HEARTBEAT_OK delivery by default. - "showOk": False, - "showAlerts": True, - "useIndicator": True, -} -DEFAULT_IDENTITY_PROFILE = { - "role": "Generalist", - "communication_style": "direct, concise, practical", - "emoji": ":gear:", -} - -IDENTITY_PROFILE_FIELDS = { - "role": "identity_role", - "communication_style": "identity_communication_style", - "emoji": "identity_emoji", -} - -EXTRA_IDENTITY_PROFILE_FIELDS = { - "autonomy_level": "identity_autonomy_level", - "verbosity": "identity_verbosity", - "output_format": "identity_output_format", - "update_cadence": "identity_update_cadence", - # Per-agent charter (optional). - # Used to give agents a "purpose in life" and a distinct vibe. - "purpose": "identity_purpose", - "personality": "identity_personality", - "custom_instructions": "identity_custom_instructions", -} - -DEFAULT_GATEWAY_FILES = frozenset( - { - "AGENTS.md", - "SOUL.md", - "TASK_SOUL.md", - "SELF.md", - "AUTONOMY.md", - "TOOLS.md", - "IDENTITY.md", - "USER.md", - "HEARTBEAT.md", - "BOOT.md", - "BOOTSTRAP.md", - "MEMORY.md", - }, -) - -# These files are intended to evolve within the agent workspace. -# Provision them if missing, but avoid overwriting existing content during updates. -# -# Examples: -# - SELF.md: evolving identity/preferences -# - USER.md: human-provided context + lead intake notes -# - MEMORY.md: curated long-term memory (consolidated) -PRESERVE_AGENT_EDITABLE_FILES = frozenset({"SELF.md", "USER.md", "MEMORY.md", "TASK_SOUL.md"}) - -HEARTBEAT_LEAD_TEMPLATE = "HEARTBEAT_LEAD.md" -HEARTBEAT_AGENT_TEMPLATE = "HEARTBEAT_AGENT.md" -_SESSION_KEY_PARTS_MIN = 2 -MAIN_TEMPLATE_MAP = { - "AGENTS.md": "MAIN_AGENTS.md", - "HEARTBEAT.md": "MAIN_HEARTBEAT.md", - "USER.md": "MAIN_USER.md", - "BOOT.md": "MAIN_BOOT.md", - "TOOLS.md": "MAIN_TOOLS.md", -} - @dataclass(frozen=True, slots=True) class ProvisionOptions: @@ -305,7 +264,7 @@ def _build_context( workspace_path = _workspace_path(agent, workspace_root) session_key = agent.openclaw_session_id or "" base_url = settings.base_url or "REPLACE_WITH_BASE_URL" - main_session_key = gateway_agent_session_key(gateway) + main_session_key = GatewayAgentIdentity.session_key(gateway) identity_profile: dict[str, Any] = {} if isinstance(agent.identity_profile, dict): identity_profile = agent.identity_profile @@ -401,7 +360,7 @@ def _build_main_context( "session_key": agent.openclaw_session_id or "", "base_url": base_url, "auth_token": auth_token, - "main_session_key": gateway_agent_session_key(gateway), + "main_session_key": GatewayAgentIdentity.session_key(gateway), "workspace_root": gateway.workspace_root or "", "user_name": (user.name or "") if user else "", "user_preferred_name": preferred_name, @@ -876,7 +835,7 @@ class GatewayMainAgentLifecycleManager(BaseAgentLifecycleManager): """Provisioning manager for organization gateway-main agents.""" def _agent_id(self, agent: Agent) -> str: - return gateway_openclaw_agent_id(self._gateway) + return GatewayAgentIdentity.openclaw_agent_id(self._gateway) def _build_context( self, @@ -974,7 +933,7 @@ async def provision_main_agent( gateway = request.gateway if not gateway.url: return - session_key = (request.session_key or gateway_agent_session_key(gateway) or "").strip() + session_key = (request.session_key or GatewayAgentIdentity.session_key(gateway) or "").strip() if not session_key: msg = "gateway main agent session_key is required" raise ValueError(msg) @@ -1008,3 +967,683 @@ async def cleanup_agent( with suppress(OpenClawGatewayError): await control_plane.delete_agent_session(session_key) return None + + +_T = TypeVar("_T") + + +@dataclass(frozen=True) +class GatewayTemplateSyncOptions: + """Runtime options controlling gateway template synchronization.""" + + user: User | None + include_main: bool = True + reset_sessions: bool = False + rotate_tokens: bool = False + force_bootstrap: bool = False + board_id: UUID | None = None + + +@dataclass(frozen=True) +class _SyncContext: + """Shared state passed to sync helper functions.""" + + session: AsyncSession + gateway: Gateway + config: GatewayClientConfig + backoff: _GatewayBackoff + options: GatewayTemplateSyncOptions + + +def _is_transient_gateway_error(exc: Exception) -> bool: + if not isinstance(exc, OpenClawGatewayError): + return False + message = str(exc).lower() + if not message: + return False + if any(marker in message for marker in _NON_TRANSIENT_GATEWAY_ERROR_MARKERS): + return False + return ("503" in message and "websocket" in message) or any( + marker in message for marker in _TRANSIENT_GATEWAY_ERROR_MARKERS + ) + + +def _gateway_timeout_message( + exc: OpenClawGatewayError, + *, + timeout_s: float, + context: str, +) -> str: + rounded_timeout = int(timeout_s) + timeout_text = f"{rounded_timeout} seconds" + if rounded_timeout >= 120: + timeout_text = f"{rounded_timeout // 60} minutes" + return f"Gateway unreachable after {timeout_text} ({context} timeout). Last error: {exc}" + + +class _GatewayBackoff: + def __init__( + self, + *, + timeout_s: float = 10 * 60, + base_delay_s: float = 0.75, + max_delay_s: float = 30.0, + jitter: float = 0.2, + timeout_context: str = "gateway operation", + ) -> None: + self._timeout_s = timeout_s + self._base_delay_s = base_delay_s + self._max_delay_s = max_delay_s + self._jitter = jitter + self._timeout_context = timeout_context + self._delay_s = base_delay_s + + def reset(self) -> None: + self._delay_s = self._base_delay_s + + @staticmethod + async def _attempt( + fn: Callable[[], Awaitable[_T]], + ) -> tuple[_T | None, OpenClawGatewayError | None]: + try: + return await fn(), None + except OpenClawGatewayError as exc: + return None, exc + + async def run(self, fn: Callable[[], Awaitable[_T]]) -> _T: + # Use per-call deadlines so long-running syncs can still tolerate a later + # gateway restart without having an already-expired retry window. + deadline_s = asyncio.get_running_loop().time() + self._timeout_s + while True: + value, error = await self._attempt(fn) + if error is not None: + exc = error + if not _is_transient_gateway_error(exc): + raise exc + now = asyncio.get_running_loop().time() + remaining = deadline_s - now + if remaining <= 0: + raise TimeoutError( + _gateway_timeout_message( + exc, + timeout_s=self._timeout_s, + context=self._timeout_context, + ), + ) from exc + + sleep_s = min(self._delay_s, remaining) + if self._jitter: + sleep_s *= 1.0 + _SECURE_RANDOM.uniform( + -self._jitter, + self._jitter, + ) + sleep_s = max(0.0, min(sleep_s, remaining)) + await asyncio.sleep(sleep_s) + self._delay_s = min(self._delay_s * 2.0, self._max_delay_s) + continue + self.reset() + if value is None: + msg = "Gateway retry produced no value without an error" + raise RuntimeError(msg) + return value + + +async def _with_gateway_retry( + fn: Callable[[], Awaitable[_T]], + *, + backoff: _GatewayBackoff, +) -> _T: + return await backoff.run(fn) + + +async def _with_coordination_gateway_retry(fn: Callable[[], Awaitable[_T]]) -> _T: + return await _with_gateway_retry( + fn, + backoff=_GatewayBackoff( + timeout_s=_COORDINATION_GATEWAY_TIMEOUT_S, + base_delay_s=_COORDINATION_GATEWAY_BASE_DELAY_S, + max_delay_s=_COORDINATION_GATEWAY_MAX_DELAY_S, + jitter=0.15, + timeout_context="gateway coordination", + ), + ) + + +def _parse_tools_md(content: str) -> dict[str, str]: + values: dict[str, str] = {} + for raw in content.splitlines(): + line = raw.strip() + if not line or line.startswith("#"): + continue + match = _TOOLS_KV_RE.match(line) + if not match: + continue + values[match.group("key")] = match.group("value").strip() + return values + + +async def _get_agent_file( + *, + agent_gateway_id: str, + name: str, + config: GatewayClientConfig, + backoff: _GatewayBackoff | None = None, +) -> str | None: + try: + + async def _do_get() -> object: + return await openclaw_call( + "agents.files.get", + {"agentId": agent_gateway_id, "name": name}, + config=config, + ) + + payload = await (backoff.run(_do_get) if backoff else _do_get()) + except OpenClawGatewayError: + return None + if isinstance(payload, str): + return payload + if isinstance(payload, dict): + content = payload.get("content") + if isinstance(content, str): + return content + file_obj = payload.get("file") + if isinstance(file_obj, dict): + nested = file_obj.get("content") + if isinstance(nested, str): + return nested + return None + + +async def _get_existing_auth_token( + *, + agent_gateway_id: str, + config: GatewayClientConfig, + backoff: _GatewayBackoff | None = None, +) -> str | None: + tools = await _get_agent_file( + agent_gateway_id=agent_gateway_id, + name="TOOLS.md", + config=config, + backoff=backoff, + ) + if not tools: + return None + values = _parse_tools_md(tools) + token = values.get("AUTH_TOKEN") + if not token: + return None + token = token.strip() + return token or None + + +async def _paused_board_ids(session: AsyncSession, board_ids: list[UUID]) -> set[UUID]: + if not board_ids: + return set() + + commands = {"/pause", "/resume"} + statement = ( + select(BoardMemory.board_id, BoardMemory.content) + .where(col(BoardMemory.board_id).in_(board_ids)) + .where(col(BoardMemory.is_chat).is_(True)) + .where(func.lower(func.trim(col(BoardMemory.content))).in_(commands)) + .order_by(col(BoardMemory.board_id), col(BoardMemory.created_at).desc()) + # Postgres: DISTINCT ON (board_id) to get latest command per board. + .distinct(col(BoardMemory.board_id)) + ) + + paused: set[UUID] = set() + for board_id, content in await session.exec(statement): + cmd = (content or "").strip().lower() + if cmd == "/pause": + paused.add(board_id) + return paused + + +def _append_sync_error( + result: GatewayTemplatesSyncResult, + *, + message: str, + agent: Agent | None = None, + board: Board | None = None, +) -> None: + result.errors.append( + GatewayTemplatesSyncError( + agent_id=agent.id if agent else None, + agent_name=agent.name if agent else None, + board_id=board.id if board else None, + message=message, + ), + ) + + +async def _rotate_agent_token(session: AsyncSession, agent: Agent) -> str: + token = generate_agent_token() + agent.agent_token_hash = hash_agent_token(token) + agent.updated_at = utcnow() + session.add(agent) + await session.commit() + await session.refresh(agent) + return token + + +async def _ping_gateway(ctx: _SyncContext, result: GatewayTemplatesSyncResult) -> bool: + try: + + async def _do_ping() -> object: + return await openclaw_call("agents.list", config=ctx.config) + + await ctx.backoff.run(_do_ping) + except (TimeoutError, OpenClawGatewayError) as exc: + _append_sync_error(result, message=str(exc)) + return False + else: + return True + + +def _base_result( + gateway: Gateway, + *, + include_main: bool, + reset_sessions: bool, +) -> GatewayTemplatesSyncResult: + return GatewayTemplatesSyncResult( + gateway_id=gateway.id, + include_main=include_main, + reset_sessions=reset_sessions, + agents_updated=0, + agents_skipped=0, + main_updated=False, + ) + + +def _boards_by_id( + boards: list[Board], + *, + board_id: UUID | None, +) -> dict[UUID, Board] | None: + boards_by_id = {board.id: board for board in boards} + if board_id is None: + return boards_by_id + board = boards_by_id.get(board_id) + if board is None: + return None + return {board_id: board} + + +async def _resolve_agent_auth_token( + ctx: _SyncContext, + result: GatewayTemplatesSyncResult, + agent: Agent, + board: Board | None, + *, + agent_gateway_id: str, +) -> tuple[str | None, bool]: + try: + auth_token = await _get_existing_auth_token( + agent_gateway_id=agent_gateway_id, + config=ctx.config, + backoff=ctx.backoff, + ) + except TimeoutError as exc: + _append_sync_error(result, agent=agent, board=board, message=str(exc)) + return None, True + + if not auth_token: + if not ctx.options.rotate_tokens: + result.agents_skipped += 1 + _append_sync_error( + result, + agent=agent, + board=board, + message=( + "Skipping agent: unable to read AUTH_TOKEN from TOOLS.md " + "(run with rotate_tokens=true to re-key)." + ), + ) + return None, False + auth_token = await _rotate_agent_token(ctx.session, agent) + + if agent.agent_token_hash and not verify_agent_token( + auth_token, + agent.agent_token_hash, + ): + if ctx.options.rotate_tokens: + auth_token = await _rotate_agent_token(ctx.session, agent) + else: + _append_sync_error( + result, + agent=agent, + board=board, + message=( + "Warning: AUTH_TOKEN in TOOLS.md does not match backend " + "token hash (agent auth may be broken)." + ), + ) + return auth_token, False + + +async def _sync_one_agent( + ctx: _SyncContext, + result: GatewayTemplatesSyncResult, + agent: Agent, + board: Board, +) -> bool: + auth_token, fatal = await _resolve_agent_auth_token( + ctx, + result, + agent, + board, + agent_gateway_id=_agent_key(agent), + ) + if fatal: + return True + if not auth_token: + return False + try: + + async def _do_provision() -> bool: + await provision_agent( + agent, + AgentProvisionRequest( + board=board, + gateway=ctx.gateway, + auth_token=auth_token, + user=ctx.options.user, + options=ProvisionOptions( + action="update", + force_bootstrap=ctx.options.force_bootstrap, + reset_session=ctx.options.reset_sessions, + ), + ), + ) + return True + + await _with_gateway_retry(_do_provision, backoff=ctx.backoff) + result.agents_updated += 1 + except TimeoutError as exc: # pragma: no cover - gateway/network dependent + result.agents_skipped += 1 + _append_sync_error(result, agent=agent, board=board, message=str(exc)) + return True + except (OSError, RuntimeError, ValueError) as exc: # pragma: no cover + result.agents_skipped += 1 + _append_sync_error( + result, + agent=agent, + board=board, + message=f"Failed to sync templates: {exc}", + ) + return False + else: + return False + + +async def _sync_main_agent( + ctx: _SyncContext, + result: GatewayTemplatesSyncResult, +) -> bool: + main_session_key = GatewayAgentIdentity.session_key(ctx.gateway) + main_agent = ( + await Agent.objects.all() + .filter(col(Agent.gateway_id) == ctx.gateway.id) + .filter(col(Agent.board_id).is_(None)) + .first(ctx.session) + ) + if main_agent is None: + _append_sync_error( + result, + message="Gateway agent record not found; " "skipping gateway agent template sync.", + ) + return True + main_gateway_agent_id = GatewayAgentIdentity.openclaw_agent_id(ctx.gateway) + + token, fatal = await _resolve_agent_auth_token( + ctx, + result, + main_agent, + board=None, + agent_gateway_id=main_gateway_agent_id, + ) + if fatal: + return True + if not token: + _append_sync_error( + result, + agent=main_agent, + message="Skipping gateway agent: unable to read AUTH_TOKEN from TOOLS.md.", + ) + return True + stop_sync = False + try: + + async def _do_provision_main() -> bool: + await provision_main_agent( + main_agent, + MainAgentProvisionRequest( + gateway=ctx.gateway, + auth_token=token, + user=ctx.options.user, + session_key=main_session_key, + options=ProvisionOptions( + action="update", + force_bootstrap=ctx.options.force_bootstrap, + reset_session=ctx.options.reset_sessions, + ), + ), + ) + return True + + await _with_gateway_retry(_do_provision_main, backoff=ctx.backoff) + except TimeoutError as exc: # pragma: no cover - gateway/network dependent + _append_sync_error(result, agent=main_agent, message=str(exc)) + stop_sync = True + except (OSError, RuntimeError, ValueError) as exc: # pragma: no cover + _append_sync_error( + result, + agent=main_agent, + message=f"Failed to sync gateway agent templates: {exc}", + ) + else: + result.main_updated = True + return stop_sync + + +async def sync_gateway_templates( + session: AsyncSession, + gateway: Gateway, + options: GatewayTemplateSyncOptions, +) -> GatewayTemplatesSyncResult: + """Synchronize AGENTS/TOOLS/etc templates to gateway-connected agents.""" + result = _base_result( + gateway, + include_main=options.include_main, + reset_sessions=options.reset_sessions, + ) + if not gateway.url: + _append_sync_error( + result, + message="Gateway URL is not configured for this gateway.", + ) + return result + + ctx = _SyncContext( + session=session, + gateway=gateway, + config=GatewayClientConfig(url=gateway.url, token=gateway.token), + backoff=_GatewayBackoff(timeout_s=10 * 60, timeout_context="template sync"), + options=options, + ) + if not await _ping_gateway(ctx, result): + return result + + boards = await Board.objects.filter_by(gateway_id=gateway.id).all(session) + boards_by_id = _boards_by_id(boards, board_id=options.board_id) + if boards_by_id is None: + _append_sync_error( + result, + message="Board does not belong to this gateway.", + ) + return result + paused_board_ids = await _paused_board_ids(session, list(boards_by_id.keys())) + if boards_by_id: + agents = await ( + Agent.objects.by_field_in("board_id", list(boards_by_id.keys())) + .order_by(col(Agent.created_at).asc()) + .all(session) + ) + else: + agents = [] + + stop_sync = False + for agent in agents: + board = boards_by_id.get(agent.board_id) if agent.board_id is not None else None + if board is None: + result.agents_skipped += 1 + _append_sync_error( + result, + agent=agent, + message="Skipping agent: board not found for agent.", + ) + continue + if board.id in paused_board_ids: + result.agents_skipped += 1 + continue + stop_sync = await _sync_one_agent(ctx, result, agent, board) + if stop_sync: + break + + if not stop_sync and options.include_main: + await _sync_main_agent(ctx, result) + return result + + +# Board lead lifecycle primitives consolidated from app.services.board_leads. +def lead_session_key(board: Board) -> str: + """Return the deterministic main session key for a board lead agent.""" + return f"agent:lead-{board.id}:main" + + +def lead_agent_name(_: Board) -> str: + """Return the default display name for board lead agents.""" + return "Lead Agent" + + +@dataclass(frozen=True, slots=True) +class LeadAgentOptions: + """Optional overrides for board-lead provisioning behavior.""" + + agent_name: str | None = None + identity_profile: dict[str, str] | None = None + action: str = "provision" + + +@dataclass(frozen=True, slots=True) +class LeadAgentRequest: + """Inputs required to ensure or provision a board lead agent.""" + + board: Board + gateway: Gateway + config: GatewayClientConfig + user: User | None + options: LeadAgentOptions = field(default_factory=LeadAgentOptions) + + +async def ensure_board_lead_agent( + session: AsyncSession, + *, + request: LeadAgentRequest, +) -> tuple[Agent, bool]: + """Ensure a board has a lead agent; return `(agent, created)`.""" + board = request.board + config_options = request.options + existing = ( + await session.exec( + select(Agent) + .where(Agent.board_id == board.id) + .where(col(Agent.is_board_lead).is_(True)), + ) + ).first() + if existing: + desired_name = config_options.agent_name or lead_agent_name(board) + changed = False + if existing.name != desired_name: + existing.name = desired_name + changed = True + if existing.gateway_id != request.gateway.id: + existing.gateway_id = request.gateway.id + changed = True + desired_session_key = lead_session_key(board) + if not existing.openclaw_session_id: + existing.openclaw_session_id = desired_session_key + changed = True + if changed: + existing.updated_at = utcnow() + session.add(existing) + await session.commit() + await session.refresh(existing) + return existing, False + + merged_identity_profile: dict[str, Any] = { + "role": "Board Lead", + "communication_style": "direct, concise, practical", + "emoji": ":gear:", + } + if config_options.identity_profile: + merged_identity_profile.update( + { + key: value.strip() + for key, value in config_options.identity_profile.items() + if value.strip() + }, + ) + + agent = Agent( + name=config_options.agent_name or lead_agent_name(board), + status="provisioning", + board_id=board.id, + gateway_id=request.gateway.id, + is_board_lead=True, + heartbeat_config=DEFAULT_HEARTBEAT_CONFIG.copy(), + identity_profile=merged_identity_profile, + openclaw_session_id=lead_session_key(board), + provision_requested_at=utcnow(), + provision_action=config_options.action, + ) + raw_token = generate_agent_token() + agent.agent_token_hash = hash_agent_token(raw_token) + session.add(agent) + await session.commit() + await session.refresh(agent) + + try: + await provision_agent( + agent, + AgentProvisionRequest( + board=board, + gateway=request.gateway, + auth_token=raw_token, + user=request.user, + options=ProvisionOptions(action=config_options.action), + ), + ) + if agent.openclaw_session_id: + await ensure_session( + agent.openclaw_session_id, + config=request.config, + label=agent.name, + ) + await send_message( + ( + f"Hello {agent.name}. Your workspace has been provisioned.\n\n" + "Start the agent, run BOOT.md, and if BOOTSTRAP.md exists run " + "it once then delete it. Begin heartbeats after startup." + ), + session_key=agent.openclaw_session_id, + config=request.config, + deliver=True, + ) + except OpenClawGatewayError: + # Best-effort provisioning. The board/agent rows should still exist. + pass + + return agent, True diff --git a/backend/app/services/openclaw/services.py b/backend/app/services/openclaw/services.py new file mode 100644 index 00000000..a74c21bd --- /dev/null +++ b/backend/app/services/openclaw/services.py @@ -0,0 +1,2949 @@ +"""High-level OpenClaw session, admin, agent, and coordination services.""" + +from __future__ import annotations + +import asyncio +import json +import logging +import re +from abc import ABC, abstractmethod +from collections.abc import Awaitable, Callable, Iterable +from dataclasses import dataclass +from datetime import UTC, datetime +from typing import TYPE_CHECKING, Any, Literal, Protocol, TypeVar +from uuid import UUID, uuid4 + +from fastapi import HTTPException, Request, status +from sqlalchemy import asc, or_ +from sqlmodel import col, select +from sse_starlette.sse import EventSourceResponse + +from app.core.agent_tokens import generate_agent_token, hash_agent_token +from app.core.auth import AuthContext +from app.core.config import settings +from app.core.time import utcnow +from app.db import crud +from app.db.pagination import paginate +from app.db.session import async_session_maker +from app.integrations.openclaw_gateway import GatewayConfig as GatewayClientConfig +from app.integrations.openclaw_gateway import ( + OpenClawGatewayError, + ensure_session, + get_chat_history, + openclaw_call, + send_message, +) +from app.models.activity_events import ActivityEvent +from app.models.agents import Agent +from app.models.approvals import Approval +from app.models.board_onboarding import BoardOnboardingSession +from app.models.boards import Board +from app.models.gateways import Gateway +from app.models.organizations import Organization +from app.models.tasks import Task +from app.schemas.agents import ( + AgentCreate, + AgentHeartbeat, + AgentHeartbeatCreate, + AgentRead, + AgentUpdate, +) +from app.schemas.common import OkResponse +from app.schemas.gateway_api import ( + GatewayResolveQuery, + GatewaySessionHistoryResponse, + GatewaySessionMessageRequest, + GatewaySessionResponse, + GatewaySessionsResponse, + GatewaysStatusResponse, +) +from app.schemas.gateway_coordination import ( + GatewayLeadBroadcastBoardResult, + GatewayLeadBroadcastRequest, + GatewayLeadBroadcastResponse, + GatewayLeadMessageRequest, + GatewayLeadMessageResponse, + GatewayMainAskUserRequest, + GatewayMainAskUserResponse, +) +from app.schemas.gateways import GatewayTemplatesSyncResult +from app.services.activity_log import record_activity +from app.services.openclaw.constants import ( + AGENT_SESSION_PREFIX, + DEFAULT_HEARTBEAT_CONFIG, + OFFLINE_AFTER, +) +from app.services.openclaw.exceptions import ( + GatewayOperation, + map_gateway_error_message, + map_gateway_error_to_http_exception, +) +from app.services.openclaw.provisioning import ( + AgentProvisionRequest, + GatewayTemplateSyncOptions, + LeadAgentOptions, + LeadAgentRequest, + MainAgentProvisionRequest, + ProvisionOptions, + _agent_key, + _with_coordination_gateway_retry, + cleanup_agent, + ensure_board_lead_agent, + provision_agent, + provision_main_agent, + sync_gateway_templates, +) +from app.services.openclaw.shared import ( + GatewayAgentIdentity, + require_gateway_config_for_board, + resolve_trace_id, + send_gateway_agent_message, +) +from app.services.organizations import ( + OrganizationContext, + get_active_membership, + has_board_access, + is_org_admin, + list_accessible_board_ids, + require_board_access, +) + +if TYPE_CHECKING: + from collections.abc import AsyncIterator, Sequence + + from fastapi_pagination.limit_offset import LimitOffsetPage + from sqlalchemy.sql.elements import ColumnElement + from sqlmodel.ext.asyncio.session import AsyncSession + from sqlmodel.sql.expression import SelectOfScalar + + from app.models.users import User + + +_T = TypeVar("_T") + + +@dataclass(frozen=True, slots=True) +class GatewayTemplateSyncQuery: + """Sync options parsed from query args for gateway template operations.""" + + include_main: bool + reset_sessions: bool + rotate_tokens: bool + force_bootstrap: bool + board_id: UUID | None + + +class GatewaySessionService: + """Read/query gateway runtime session state for user-facing APIs.""" + + def __init__(self, session: AsyncSession) -> None: + self._session = session + self._logger = logging.getLogger(__name__) + + @property + def session(self) -> AsyncSession: + return self._session + + @session.setter + def session(self, value: AsyncSession) -> None: + self._session = value + + @property + def logger(self) -> logging.Logger: + return self._logger + + @logger.setter + def logger(self, value: logging.Logger) -> None: + self._logger = value + + @staticmethod + def to_resolve_query( + board_id: str | None, + gateway_url: str | None, + gateway_token: str | None, + ) -> GatewayResolveQuery: + return GatewayResolveQuery( + board_id=board_id, + gateway_url=gateway_url, + gateway_token=gateway_token, + ) + + @staticmethod + def as_object_list(value: object) -> list[object]: + if value is None: + return [] + if isinstance(value, list): + return value + if isinstance(value, (tuple, set)): + return list(value) + if isinstance(value, (str, bytes, dict)): + return [] + if isinstance(value, Iterable): + return list(value) + return [] + + async def resolve_gateway( + self, + params: GatewayResolveQuery, + *, + user: User | None = None, + ) -> tuple[Board | None, GatewayClientConfig, str | None]: + self.logger.log( + 5, + "gateway.resolve.start board_id=%s gateway_url=%s", + params.board_id, + params.gateway_url, + ) + if params.gateway_url: + return ( + None, + GatewayClientConfig(url=params.gateway_url, token=params.gateway_token), + None, + ) + if not params.board_id: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="board_id or gateway_url is required", + ) + board = await Board.objects.by_id(params.board_id).first(self.session) + if board is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Board not found", + ) + if user is not None: + await require_board_access(self.session, user=user, board=board, write=False) + if not board.gateway_id: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Board gateway_id is required", + ) + gateway = await Gateway.objects.by_id(board.gateway_id).first(self.session) + if gateway is None: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Board gateway_id is invalid", + ) + if not gateway.url: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Gateway url is required", + ) + main_agent = ( + await Agent.objects.filter_by(gateway_id=gateway.id) + .filter(col(Agent.board_id).is_(None)) + .first(self.session) + ) + main_session = main_agent.openclaw_session_id if main_agent else None + return ( + board, + GatewayClientConfig(url=gateway.url, token=gateway.token), + main_session, + ) + + async def require_gateway( + self, + board_id: str | None, + *, + user: User | None = None, + ) -> tuple[Board, GatewayClientConfig, str | None]: + params = GatewayResolveQuery(board_id=board_id) + board, config, main_session = await self.resolve_gateway(params, user=user) + if board is None: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="board_id is required", + ) + return board, config, main_session + + async def list_sessions(self, config: GatewayClientConfig) -> list[dict[str, object]]: + sessions = await openclaw_call("sessions.list", config=config) + if isinstance(sessions, dict): + raw_items = self.as_object_list(sessions.get("sessions")) + else: + raw_items = self.as_object_list(sessions) + return [item for item in raw_items if isinstance(item, dict)] + + async def with_main_session( + self, + sessions_list: list[dict[str, object]], + *, + config: GatewayClientConfig, + main_session: str | None, + ) -> list[dict[str, object]]: + if not main_session or any(item.get("key") == main_session for item in sessions_list): + return sessions_list + try: + await ensure_session(main_session, config=config, label="Gateway Agent") + return await self.list_sessions(config) + except OpenClawGatewayError: + return sessions_list + + @staticmethod + def _require_same_org(board: Board | None, organization_id: UUID) -> None: + if board is not None and board.organization_id != organization_id: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) + + async def get_status( + self, + *, + params: GatewayResolveQuery, + organization_id: UUID, + user: User | None, + ) -> GatewaysStatusResponse: + board, config, main_session = await self.resolve_gateway(params, user=user) + self._require_same_org(board, organization_id) + try: + sessions = await openclaw_call("sessions.list", config=config) + if isinstance(sessions, dict): + sessions_list = self.as_object_list(sessions.get("sessions")) + else: + sessions_list = self.as_object_list(sessions) + main_session_entry: object | None = None + main_session_error: str | None = None + if main_session: + try: + ensured = await ensure_session( + main_session, + config=config, + label="Gateway Agent", + ) + if isinstance(ensured, dict): + main_session_entry = ensured.get("entry") or ensured + except OpenClawGatewayError as exc: + main_session_error = str(exc) + return GatewaysStatusResponse( + connected=True, + gateway_url=config.url, + sessions_count=len(sessions_list), + sessions=sessions_list, + main_session=main_session_entry, + main_session_error=main_session_error, + ) + except OpenClawGatewayError as exc: + return GatewaysStatusResponse( + connected=False, + gateway_url=config.url, + error=str(exc), + ) + + async def get_sessions( + self, + *, + board_id: str | None, + organization_id: UUID, + user: User | None, + ) -> GatewaySessionsResponse: + params = GatewayResolveQuery(board_id=board_id) + board, config, main_session = await self.resolve_gateway(params, user=user) + self._require_same_org(board, organization_id) + try: + sessions = await openclaw_call("sessions.list", config=config) + except OpenClawGatewayError as exc: + raise HTTPException( + status_code=status.HTTP_502_BAD_GATEWAY, + detail=str(exc), + ) from exc + if isinstance(sessions, dict): + sessions_list = self.as_object_list(sessions.get("sessions")) + else: + sessions_list = self.as_object_list(sessions) + + main_session_entry: object | None = None + if main_session: + try: + ensured = await ensure_session( + main_session, + config=config, + label="Gateway Agent", + ) + if isinstance(ensured, dict): + main_session_entry = ensured.get("entry") or ensured + except OpenClawGatewayError: + main_session_entry = None + return GatewaySessionsResponse(sessions=sessions_list, main_session=main_session_entry) + + async def get_session( + self, + *, + session_id: str, + board_id: str | None, + organization_id: UUID, + user: User | None, + ) -> GatewaySessionResponse: + params = GatewayResolveQuery(board_id=board_id) + board, config, main_session = await self.resolve_gateway(params, user=user) + self._require_same_org(board, organization_id) + try: + sessions_list = await self.list_sessions(config) + except OpenClawGatewayError as exc: + raise HTTPException( + status_code=status.HTTP_502_BAD_GATEWAY, + detail=str(exc), + ) from exc + sessions_list = await self.with_main_session( + sessions_list, + config=config, + main_session=main_session, + ) + session_entry = next( + (item for item in sessions_list if item.get("key") == session_id), None + ) + if session_entry is None and main_session and session_id == main_session: + try: + ensured = await ensure_session( + main_session, + config=config, + label="Gateway Agent", + ) + if isinstance(ensured, dict): + session_entry = ensured.get("entry") or ensured + except OpenClawGatewayError: + session_entry = None + if session_entry is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Session not found", + ) + return GatewaySessionResponse(session=session_entry) + + async def get_session_history( + self, + *, + session_id: str, + board_id: str | None, + organization_id: UUID, + user: User | None, + ) -> GatewaySessionHistoryResponse: + board, config, _ = await self.require_gateway(board_id, user=user) + self._require_same_org(board, organization_id) + try: + history = await get_chat_history(session_id, config=config) + except OpenClawGatewayError as exc: + raise HTTPException( + status_code=status.HTTP_502_BAD_GATEWAY, + detail=str(exc), + ) from exc + if isinstance(history, dict) and isinstance(history.get("messages"), list): + return GatewaySessionHistoryResponse(history=history["messages"]) + return GatewaySessionHistoryResponse(history=self.as_object_list(history)) + + async def send_session_message( + self, + *, + session_id: str, + payload: GatewaySessionMessageRequest, + board_id: str | None, + user: User | None, + ) -> None: + board, config, main_session = await self.require_gateway(board_id, user=user) + if user is None: + raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED) + await require_board_access(self.session, user=user, board=board, write=True) + try: + if main_session and session_id == main_session: + await ensure_session(main_session, config=config, label="Gateway Agent") + await send_message(payload.content, session_key=session_id, config=config) + except OpenClawGatewayError as exc: + raise HTTPException( + status_code=status.HTTP_502_BAD_GATEWAY, + detail=str(exc), + ) from exc + + +class AbstractGatewayMainAgentManager(ABC): + """Abstract manager for gateway-main agent naming/profile behavior.""" + + @abstractmethod + def build_main_agent_name(self, gateway: Gateway) -> str: + raise NotImplementedError + + @abstractmethod + def build_identity_profile(self) -> dict[str, str]: + raise NotImplementedError + + +class DefaultGatewayMainAgentManager(AbstractGatewayMainAgentManager): + """Default naming/profile strategy for gateway-main agents.""" + + def build_main_agent_name(self, gateway: Gateway) -> str: + return f"{gateway.name} Gateway Agent" + + def build_identity_profile(self) -> dict[str, str]: + return { + "role": "Gateway Agent", + "communication_style": "direct, concise, practical", + "emoji": ":compass:", + } + + +class GatewayAdminLifecycleService: + """Write-side gateway lifecycle service (CRUD, main agent, template sync).""" + + def __init__( + self, + session: AsyncSession, + *, + main_agent_manager: AbstractGatewayMainAgentManager | None = None, + ) -> None: + self._session = session + self._logger = logging.getLogger(__name__) + self._main_agent_manager = main_agent_manager or DefaultGatewayMainAgentManager() + + @property + def session(self) -> AsyncSession: + return self._session + + @session.setter + def session(self, value: AsyncSession) -> None: + self._session = value + + @property + def logger(self) -> logging.Logger: + return self._logger + + @logger.setter + def logger(self, value: logging.Logger) -> None: + self._logger = value + + @property + def main_agent_manager(self) -> AbstractGatewayMainAgentManager: + return self._main_agent_manager + + @main_agent_manager.setter + def main_agent_manager(self, value: AbstractGatewayMainAgentManager) -> None: + self._main_agent_manager = value + + async def require_gateway( + self, + *, + gateway_id: UUID, + organization_id: UUID, + ) -> Gateway: + gateway = ( + await Gateway.objects.by_id(gateway_id) + .filter(col(Gateway.organization_id) == organization_id) + .first(self.session) + ) + if gateway is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Gateway not found", + ) + return gateway + + async def find_main_agent(self, gateway: Gateway) -> Agent | None: + return ( + await Agent.objects.filter_by(gateway_id=gateway.id) + .filter(col(Agent.board_id).is_(None)) + .first(self.session) + ) + + @staticmethod + def extract_agent_id_from_entry(item: object) -> str | None: + if isinstance(item, str): + value = item.strip() + return value or None + if not isinstance(item, dict): + return None + for key in ("id", "agentId", "agent_id"): + raw = item.get(key) + if isinstance(raw, str) and raw.strip(): + return raw.strip() + return None + + @staticmethod + def extract_agents_list(payload: object) -> list[object]: + if isinstance(payload, list): + return [item for item in payload] + if not isinstance(payload, dict): + return [] + agents = payload.get("agents") or [] + if not isinstance(agents, list): + return [] + return [item for item in agents] + + async def upsert_main_agent_record(self, gateway: Gateway) -> tuple[Agent, bool]: + changed = False + session_key = GatewayAgentIdentity.session_key(gateway) + agent = await self.find_main_agent(gateway) + main_agent_name = self.main_agent_manager.build_main_agent_name(gateway) + identity_profile = self.main_agent_manager.build_identity_profile() + if agent is None: + agent = Agent( + name=main_agent_name, + status="provisioning", + board_id=None, + gateway_id=gateway.id, + is_board_lead=False, + openclaw_session_id=session_key, + heartbeat_config=DEFAULT_HEARTBEAT_CONFIG.copy(), + identity_profile=identity_profile, + ) + self.session.add(agent) + changed = True + if agent.board_id is not None: + agent.board_id = None + changed = True + if agent.gateway_id != gateway.id: + agent.gateway_id = gateway.id + changed = True + if agent.is_board_lead: + agent.is_board_lead = False + changed = True + if agent.name != main_agent_name: + agent.name = main_agent_name + changed = True + if agent.openclaw_session_id != session_key: + agent.openclaw_session_id = session_key + changed = True + if agent.heartbeat_config is None: + agent.heartbeat_config = DEFAULT_HEARTBEAT_CONFIG.copy() + changed = True + if agent.identity_profile is None: + agent.identity_profile = identity_profile + changed = True + if not agent.status: + agent.status = "provisioning" + changed = True + if changed: + agent.updated_at = utcnow() + self.session.add(agent) + return agent, changed + + async def gateway_has_main_agent_entry(self, gateway: Gateway) -> bool: + if not gateway.url: + return False + config = GatewayClientConfig(url=gateway.url, token=gateway.token) + target_id = GatewayAgentIdentity.openclaw_agent_id(gateway) + try: + payload = await openclaw_call("agents.list", config=config) + except OpenClawGatewayError: + return True + for item in self.extract_agents_list(payload): + if self.extract_agent_id_from_entry(item) == target_id: + return True + return False + + async def provision_main_agent_record( + self, + gateway: Gateway, + agent: Agent, + *, + user: User | None, + action: str, + notify: bool, + ) -> Agent: + session_key = GatewayAgentIdentity.session_key(gateway) + raw_token = generate_agent_token() + agent.agent_token_hash = hash_agent_token(raw_token) + agent.provision_requested_at = utcnow() + agent.provision_action = action + agent.updated_at = utcnow() + if agent.heartbeat_config is None: + agent.heartbeat_config = DEFAULT_HEARTBEAT_CONFIG.copy() + self.session.add(agent) + await self.session.commit() + await self.session.refresh(agent) + if not gateway.url: + return agent + try: + await provision_main_agent( + agent, + MainAgentProvisionRequest( + gateway=gateway, + auth_token=raw_token, + user=user, + session_key=session_key, + options=ProvisionOptions(action=action), + ), + ) + await ensure_session( + session_key, + config=GatewayClientConfig(url=gateway.url, token=gateway.token), + label=agent.name, + ) + if notify: + await send_message( + ( + f"Hello {agent.name}. Your gateway provisioning was updated.\n\n" + "Please re-read AGENTS.md, USER.md, HEARTBEAT.md, and TOOLS.md. " + "If BOOTSTRAP.md exists, run it once then delete it. " + "Begin heartbeats after startup." + ), + session_key=session_key, + config=GatewayClientConfig(url=gateway.url, token=gateway.token), + deliver=True, + ) + self.logger.info( + "gateway.main_agent.provision_success gateway_id=%s agent_id=%s action=%s", + gateway.id, + agent.id, + action, + ) + except OpenClawGatewayError as exc: + self.logger.warning( + "gateway.main_agent.provision_failed_gateway gateway_id=%s agent_id=%s error=%s", + gateway.id, + agent.id, + str(exc), + ) + except (OSError, RuntimeError, ValueError) as exc: + self.logger.error( + "gateway.main_agent.provision_failed gateway_id=%s agent_id=%s error=%s", + gateway.id, + agent.id, + str(exc), + ) + except Exception as exc: # pragma: no cover - defensive fallback + self.logger.critical( + "gateway.main_agent.provision_failed_unexpected gateway_id=%s agent_id=%s " + "error_type=%s error=%s", + gateway.id, + agent.id, + exc.__class__.__name__, + str(exc), + ) + return agent + + async def ensure_main_agent( + self, + gateway: Gateway, + auth: AuthContext, + *, + action: str = "provision", + ) -> Agent: + self.logger.log( + 5, + "gateway.main_agent.ensure.start gateway_id=%s action=%s", + gateway.id, + action, + ) + agent, _ = await self.upsert_main_agent_record(gateway) + return await self.provision_main_agent_record( + gateway, + agent, + user=auth.user, + action=action, + notify=True, + ) + + async def ensure_gateway_agents_exist(self, gateways: list[Gateway]) -> None: + for gateway in gateways: + agent, gateway_changed = await self.upsert_main_agent_record(gateway) + has_gateway_entry = await self.gateway_has_main_agent_entry(gateway) + needs_provision = ( + gateway_changed or not bool(agent.agent_token_hash) or not has_gateway_entry + ) + if needs_provision: + await self.provision_main_agent_record( + gateway, + agent, + user=None, + action="provision", + notify=False, + ) + + async def clear_agent_foreign_keys(self, *, agent_id: UUID) -> None: + now = utcnow() + await crud.update_where( + self.session, + Task, + col(Task.assigned_agent_id) == agent_id, + col(Task.status) == "in_progress", + assigned_agent_id=None, + status="inbox", + in_progress_at=None, + updated_at=now, + commit=False, + ) + await crud.update_where( + self.session, + Task, + col(Task.assigned_agent_id) == agent_id, + col(Task.status) != "in_progress", + assigned_agent_id=None, + updated_at=now, + commit=False, + ) + await crud.update_where( + self.session, + ActivityEvent, + col(ActivityEvent.agent_id) == agent_id, + agent_id=None, + commit=False, + ) + await crud.update_where( + self.session, + Approval, + col(Approval.agent_id) == agent_id, + agent_id=None, + commit=False, + ) + + async def sync_templates( + self, + gateway: Gateway, + *, + query: GatewayTemplateSyncQuery, + auth: AuthContext, + ) -> GatewayTemplatesSyncResult: + self.logger.log( + 5, + "gateway.templates.sync.start gateway_id=%s include_main=%s", + gateway.id, + query.include_main, + ) + await self.ensure_gateway_agents_exist([gateway]) + result = await sync_gateway_templates( + self.session, + gateway, + GatewayTemplateSyncOptions( + user=auth.user, + include_main=query.include_main, + reset_sessions=query.reset_sessions, + rotate_tokens=query.rotate_tokens, + force_bootstrap=query.force_bootstrap, + board_id=query.board_id, + ), + ) + self.logger.info("gateway.templates.sync.success gateway_id=%s", gateway.id) + return result + + +class ActorContextLike(Protocol): + """Minimal actor context contract consumed by lifecycle APIs.""" + + actor_type: Literal["user", "agent"] + user: User | None + agent: Agent | None + + +@dataclass(frozen=True, slots=True) +class AgentUpdateOptions: + """Runtime options for update-and-reprovision flows.""" + + force: bool + user: User | None + context: OrganizationContext + + +@dataclass(frozen=True, slots=True) +class AgentUpdateProvisionTarget: + """Resolved target for an update provision operation.""" + + is_main_agent: bool + board: Board | None + gateway: Gateway + client_config: GatewayClientConfig + + +@dataclass(frozen=True, slots=True) +class AgentUpdateProvisionRequest: + """Provision request payload for agent updates.""" + + target: AgentUpdateProvisionTarget + raw_token: str + user: User | None + force_bootstrap: bool + + +class AbstractProvisionExecution(ABC): + """Shared async execution contract for board/main agent provisioning actions.""" + + def __init__( + self, + *, + service: AgentLifecycleService, + agent: Agent, + provision_request: AgentUpdateProvisionRequest, + action: str, + wakeup_verb: str, + raise_gateway_errors: bool, + ) -> None: + self._service = service + self._agent = agent + self._request = provision_request + self._action = action + self._wakeup_verb = wakeup_verb + self._raise_gateway_errors = raise_gateway_errors + + @property + def agent(self) -> Agent: + return self._agent + + @agent.setter + def agent(self, value: Agent) -> None: + if not isinstance(value, Agent): + msg = "agent must be an Agent model" + raise TypeError(msg) + self._agent = value + + @property + def request(self) -> AgentUpdateProvisionRequest: + return self._request + + @request.setter + def request(self, value: AgentUpdateProvisionRequest) -> None: + if not isinstance(value, AgentUpdateProvisionRequest): + msg = "request must be an AgentUpdateProvisionRequest" + raise TypeError(msg) + self._request = value + + @property + def logger(self) -> logging.Logger: + return self._service.logger + + @abstractmethod + async def _provision(self) -> None: + raise NotImplementedError + + async def execute(self) -> None: + self.logger.log( + 5, + "agent.provision.start action=%s agent_id=%s target_main=%s", + self._action, + self.agent.id, + self.request.target.is_main_agent, + ) + try: + await self._provision() + await self._service.send_wakeup_message( + self.agent, + self.request.target.client_config, + verb=self._wakeup_verb, + ) + self.agent.provision_confirm_token_hash = None + self.agent.provision_requested_at = None + self.agent.provision_action = None + self.agent.status = "online" + self.agent.updated_at = utcnow() + self._service.session.add(self.agent) + await self._service.session.commit() + record_activity( + self._service.session, + event_type=f"agent.{self._action}.direct", + message=f"{self._action.capitalize()}d directly for {self.agent.name}.", + agent_id=self.agent.id, + ) + record_activity( + self._service.session, + event_type="agent.wakeup.sent", + message=f"Wakeup message sent to {self.agent.name}.", + agent_id=self.agent.id, + ) + await self._service.session.commit() + self.logger.info( + "agent.provision.success action=%s agent_id=%s", + self._action, + self.agent.id, + ) + except OpenClawGatewayError as exc: + self._service.record_instruction_failure( + self._service.session, + self.agent, + str(exc), + self._action, + ) + await self._service.session.commit() + self.logger.error( + "agent.provision.gateway_error action=%s agent_id=%s error=%s", + self._action, + self.agent.id, + str(exc), + ) + if self._raise_gateway_errors: + raise HTTPException( + status_code=status.HTTP_502_BAD_GATEWAY, + detail=f"Gateway {self._action} failed: {exc}", + ) from exc + except (OSError, RuntimeError, ValueError) as exc: # pragma: no cover + self._service.record_instruction_failure( + self._service.session, + self.agent, + str(exc), + self._action, + ) + await self._service.session.commit() + self.logger.critical( + "agent.provision.runtime_error action=%s agent_id=%s error=%s", + self._action, + self.agent.id, + str(exc), + ) + if self._raise_gateway_errors: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Unexpected error {self._action}ing agent provisioning.", + ) from exc + + +class BoardAgentProvisionExecution(AbstractProvisionExecution): + """Provision execution for board-scoped agents.""" + + async def _provision(self) -> None: + board = self.request.target.board + if board is None: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="board is required for non-main agent provisioning", + ) + await provision_agent( + self.agent, + AgentProvisionRequest( + board=board, + gateway=self.request.target.gateway, + auth_token=self.request.raw_token, + user=self.request.user, + options=ProvisionOptions( + action=self._action, + force_bootstrap=self.request.force_bootstrap, + reset_session=True, + ), + ), + ) + + +class MainAgentProvisionExecution(AbstractProvisionExecution): + """Provision execution for gateway-main agents.""" + + async def _provision(self) -> None: + await provision_main_agent( + self.agent, + MainAgentProvisionRequest( + gateway=self.request.target.gateway, + auth_token=self.request.raw_token, + user=self.request.user, + session_key=self.agent.openclaw_session_id, + options=ProvisionOptions( + action=self._action, + force_bootstrap=self.request.force_bootstrap, + reset_session=True, + ), + ), + ) + + +class AgentLifecycleService: + """Async service encapsulating agent lifecycle behavior for API routes.""" + + def __init__(self, session: AsyncSession) -> None: + self._session = session + self._logger = logging.getLogger(__name__) + + @property + def session(self) -> AsyncSession: + return self._session + + @session.setter + def session(self, value: AsyncSession) -> None: + self._session = value + + @property + def logger(self) -> logging.Logger: + return self._logger + + @logger.setter + def logger(self, value: logging.Logger) -> None: + self._logger = value + + @staticmethod + def parse_since(value: str | None) -> datetime | None: + if not value: + return None + normalized = value.strip() + if not normalized: + return None + normalized = normalized.replace("Z", "+00:00") + try: + parsed = datetime.fromisoformat(normalized) + except ValueError: + return None + if parsed.tzinfo is not None: + return parsed.astimezone(UTC).replace(tzinfo=None) + return parsed + + @staticmethod + def slugify(value: str) -> str: + slug = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-") + return slug or uuid4().hex + + @classmethod + def build_session_key(cls, agent_name: str) -> str: + return f"{AGENT_SESSION_PREFIX}:{cls.slugify(agent_name)}:main" + + @classmethod + def workspace_path(cls, agent_name: str, workspace_root: str | None) -> str: + if not workspace_root: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Gateway workspace_root is required", + ) + root = workspace_root.rstrip("/") + return f"{root}/workspace-{cls.slugify(agent_name)}" + + async def require_board( + self, + board_id: UUID | str | None, + *, + user: User | None = None, + write: bool = False, + ) -> Board: + if not board_id: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="board_id is required", + ) + board = await Board.objects.by_id(board_id).first(self.session) + if board is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Board not found", + ) + if user is not None: + await require_board_access(self.session, user=user, board=board, write=write) + return board + + async def require_gateway( + self, + board: Board, + ) -> tuple[Gateway, GatewayClientConfig]: + if not board.gateway_id: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Board gateway_id is required", + ) + gateway = await Gateway.objects.by_id(board.gateway_id).first(self.session) + if gateway is None: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Board gateway_id is invalid", + ) + if gateway.organization_id != board.organization_id: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Board gateway_id is invalid", + ) + if not gateway.url: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Gateway url is required", + ) + if not gateway.workspace_root: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Gateway workspace_root is required", + ) + return gateway, GatewayClientConfig(url=gateway.url, token=gateway.token) + + @staticmethod + def gateway_client_config(gateway: Gateway) -> GatewayClientConfig: + if not gateway.url: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Gateway url is required", + ) + return GatewayClientConfig(url=gateway.url, token=gateway.token) + + @staticmethod + def is_gateway_main(agent: Agent) -> bool: + return agent.board_id is None + + @classmethod + def to_agent_read(cls, agent: Agent) -> AgentRead: + model = AgentRead.model_validate(agent, from_attributes=True) + return model.model_copy( + update={"is_gateway_main": cls.is_gateway_main(agent)}, + ) + + @staticmethod + def coerce_agent_items(items: Sequence[Any]) -> list[Agent]: + agents: list[Agent] = [] + for item in items: + if not isinstance(item, Agent): + msg = "Expected Agent items from paginated query" + raise TypeError(msg) + agents.append(item) + return agents + + async def get_main_agent_gateway(self, agent: Agent) -> Gateway | None: + if agent.board_id is not None: + return None + return await Gateway.objects.by_id(agent.gateway_id).first(self.session) + + async def ensure_gateway_session( + self, + agent_name: str, + config: GatewayClientConfig, + ) -> tuple[str, str | None]: + session_key = self.build_session_key(agent_name) + try: + await ensure_session(session_key, config=config, label=agent_name) + except OpenClawGatewayError as exc: + self.logger.warning( + "agent.session.ensure_failed agent_name=%s error=%s", + agent_name, + str(exc), + ) + return session_key, str(exc) + return session_key, None + + @classmethod + def with_computed_status(cls, agent: Agent) -> Agent: + now = utcnow() + if agent.status in {"deleting", "updating"}: + return agent + if agent.last_seen_at is None: + agent.status = "provisioning" + elif now - agent.last_seen_at > OFFLINE_AFTER: + agent.status = "offline" + return agent + + @classmethod + def serialize_agent(cls, agent: Agent) -> dict[str, object]: + return cls.to_agent_read(cls.with_computed_status(agent)).model_dump(mode="json") + + async def fetch_agent_events( + self, + board_id: UUID | None, + since: datetime, + ) -> list[Agent]: + statement = select(Agent) + if board_id: + statement = statement.where(col(Agent.board_id) == board_id) + statement = statement.where( + or_( + col(Agent.updated_at) >= since, + col(Agent.last_seen_at) >= since, + ), + ).order_by(asc(col(Agent.updated_at))) + return list(await self.session.exec(statement)) + + async def require_user_context(self, user: User | None) -> OrganizationContext: + if user is None: + raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED) + member = await get_active_membership(self.session, user) + if member is None: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) + organization = await Organization.objects.by_id(member.organization_id).first(self.session) + if organization is None: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) + return OrganizationContext(organization=organization, member=member) + + async def require_agent_access( + self, + *, + agent: Agent, + ctx: OrganizationContext, + write: bool, + ) -> None: + if agent.board_id is None: + if not is_org_admin(ctx.member): + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) + gateway = await self.get_main_agent_gateway(agent) + if gateway is None or gateway.organization_id != ctx.organization.id: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) + return + + board = await Board.objects.by_id(agent.board_id).first(self.session) + if board is None or board.organization_id != ctx.organization.id: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) + if not await has_board_access(self.session, member=ctx.member, board=board, write=write): + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) + + @staticmethod + def record_heartbeat(session: AsyncSession, agent: Agent) -> None: + record_activity( + session, + event_type="agent.heartbeat", + message=f"Heartbeat received from {agent.name}.", + agent_id=agent.id, + ) + + @staticmethod + def record_instruction_failure( + session: AsyncSession, + agent: Agent, + error: str, + action: str, + ) -> None: + action_label = action.replace("_", " ").capitalize() + record_activity( + session, + event_type=f"agent.{action}.failed", + message=f"{action_label} message failed: {error}", + agent_id=agent.id, + ) + + async def coerce_agent_create_payload( + self, + payload: AgentCreate, + actor: ActorContextLike, + ) -> AgentCreate: + if actor.actor_type == "user": + ctx = await self.require_user_context(actor.user) + if not is_org_admin(ctx.member): + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) + return payload + + if actor.actor_type == "agent": + if not actor.agent or not actor.agent.is_board_lead: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Only board leads can create agents", + ) + if not actor.agent.board_id: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Board lead must be assigned to a board", + ) + if payload.board_id and payload.board_id != actor.agent.board_id: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Board leads can only create agents in their own board", + ) + return AgentCreate(**{**payload.model_dump(), "board_id": actor.agent.board_id}) + + return payload + + async def ensure_unique_agent_name( + self, + *, + board: Board, + gateway: Gateway, + requested_name: str, + ) -> None: + if not requested_name: + return + + existing = ( + await self.session.exec( + select(Agent) + .where(Agent.board_id == board.id) + .where(col(Agent.name).ilike(requested_name)), + ) + ).first() + if existing: + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail="An agent with this name already exists on this board.", + ) + + existing_gateway = ( + await self.session.exec( + select(Agent) + .join(Board, col(Agent.board_id) == col(Board.id)) + .where(col(Board.gateway_id) == gateway.id) + .where(col(Agent.name).ilike(requested_name)), + ) + ).first() + if existing_gateway: + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail="An agent with this name already exists in this gateway workspace.", + ) + + desired_session_key = self.build_session_key(requested_name) + existing_session_key = ( + await self.session.exec( + select(Agent) + .join(Board, col(Agent.board_id) == col(Board.id)) + .where(col(Board.gateway_id) == gateway.id) + .where(col(Agent.openclaw_session_id) == desired_session_key), + ) + ).first() + if existing_session_key: + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail=( + "This agent name would collide with an existing workspace " + "session key. Pick a different name." + ), + ) + + async def persist_new_agent( + self, + *, + data: dict[str, Any], + client_config: GatewayClientConfig, + ) -> tuple[Agent, str, str | None]: + agent = Agent.model_validate(data) + agent.status = "provisioning" + raw_token = generate_agent_token() + agent.agent_token_hash = hash_agent_token(raw_token) + if agent.heartbeat_config is None: + agent.heartbeat_config = DEFAULT_HEARTBEAT_CONFIG.copy() + agent.provision_requested_at = utcnow() + agent.provision_action = "provision" + session_key, session_error = await self.ensure_gateway_session( + agent.name, + client_config, + ) + agent.openclaw_session_id = session_key + self.session.add(agent) + await self.session.commit() + await self.session.refresh(agent) + return agent, raw_token, session_error + + async def record_session_creation( + self, + *, + agent: Agent, + session_error: str | None, + ) -> None: + if session_error: + record_activity( + self.session, + event_type="agent.session.failed", + message=f"Session sync failed for {agent.name}: {session_error}", + agent_id=agent.id, + ) + else: + record_activity( + self.session, + event_type="agent.session.created", + message=f"Session created for {agent.name}.", + agent_id=agent.id, + ) + await self.session.commit() + + async def send_wakeup_message( + self, + agent: Agent, + config: GatewayClientConfig, + verb: str = "provisioned", + ) -> None: + session_key = agent.openclaw_session_id or self.build_session_key(agent.name) + await ensure_session(session_key, config=config, label=agent.name) + message = ( + f"Hello {agent.name}. Your workspace has been {verb}.\n\n" + "Start the agent, run BOOT.md, and if BOOTSTRAP.md exists run it once " + "then delete it. Begin heartbeats after startup." + ) + await send_message(message, session_key=session_key, config=config, deliver=True) + + async def provision_new_agent( + self, + *, + agent: Agent, + request: AgentProvisionRequest, + client_config: GatewayClientConfig, + ) -> None: + execution = BoardAgentProvisionExecution( + service=self, + agent=agent, + provision_request=AgentUpdateProvisionRequest( + target=AgentUpdateProvisionTarget( + is_main_agent=False, + board=request.board, + gateway=request.gateway, + client_config=client_config, + ), + raw_token=request.auth_token, + user=request.user, + force_bootstrap=request.options.force_bootstrap, + ), + action="provision", + wakeup_verb="provisioned", + raise_gateway_errors=False, + ) + await execution.execute() + + async def validate_agent_update_inputs( + self, + *, + ctx: OrganizationContext, + updates: dict[str, Any], + make_main: bool | None, + ) -> None: + if make_main and not is_org_admin(ctx.member): + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) + if "status" in updates: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="status is controlled by agent heartbeat", + ) + if "board_id" in updates and updates["board_id"] is not None: + new_board = await self.require_board(updates["board_id"]) + if new_board.organization_id != ctx.organization.id: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) + if not await has_board_access( + self.session, + member=ctx.member, + board=new_board, + write=True, + ): + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) + + async def apply_agent_update_mutations( + self, + *, + agent: Agent, + updates: dict[str, Any], + make_main: bool | None, + ) -> tuple[Gateway | None, Gateway | None]: + main_gateway = await self.get_main_agent_gateway(agent) + gateway_for_main: Gateway | None = None + + if make_main: + board_source = updates.get("board_id") or agent.board_id + board_for_main = await self.require_board(board_source) + gateway_for_main, _ = await self.require_gateway(board_for_main) + updates["board_id"] = None + updates["gateway_id"] = gateway_for_main.id + agent.is_board_lead = False + agent.openclaw_session_id = GatewayAgentIdentity.session_key(gateway_for_main) + main_gateway = gateway_for_main + elif make_main is not None: + if "board_id" not in updates or updates["board_id"] is None: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail=( + "board_id is required when converting a gateway-main agent " + "to board scope" + ), + ) + board = await self.require_board(updates["board_id"]) + if board.gateway_id is None: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Board gateway_id is required", + ) + updates["gateway_id"] = board.gateway_id + agent.openclaw_session_id = None + + if make_main is None and "board_id" in updates: + board = await self.require_board(updates["board_id"]) + if board.gateway_id is None: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Board gateway_id is required", + ) + updates["gateway_id"] = board.gateway_id + for key, value in updates.items(): + setattr(agent, key, value) + + if make_main is None and main_gateway is not None: + agent.board_id = None + agent.gateway_id = main_gateway.id + agent.is_board_lead = False + if make_main is False and agent.board_id is not None: + board = await self.require_board(agent.board_id) + if board.gateway_id is None: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Board gateway_id is required", + ) + agent.gateway_id = board.gateway_id + agent.updated_at = utcnow() + if agent.heartbeat_config is None: + agent.heartbeat_config = DEFAULT_HEARTBEAT_CONFIG.copy() + self.session.add(agent) + await self.session.commit() + await self.session.refresh(agent) + return main_gateway, gateway_for_main + + async def resolve_agent_update_target( + self, + *, + agent: Agent, + make_main: bool | None, + main_gateway: Gateway | None, + gateway_for_main: Gateway | None, + ) -> AgentUpdateProvisionTarget: + if make_main: + if gateway_for_main is None: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Gateway agent requires a gateway configuration", + ) + return AgentUpdateProvisionTarget( + is_main_agent=True, + board=None, + gateway=gateway_for_main, + client_config=self.gateway_client_config(gateway_for_main), + ) + + if make_main is None and agent.board_id is None and main_gateway is not None: + return AgentUpdateProvisionTarget( + is_main_agent=True, + board=None, + gateway=main_gateway, + client_config=self.gateway_client_config(main_gateway), + ) + + if agent.board_id is None: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="board_id is required for non-main agents", + ) + board = await self.require_board(agent.board_id) + gateway, client_config = await self.require_gateway(board) + return AgentUpdateProvisionTarget( + is_main_agent=False, + board=board, + gateway=gateway, + client_config=client_config, + ) + + async def ensure_agent_update_session( + self, + *, + agent: Agent, + client_config: GatewayClientConfig, + ) -> None: + session_key = agent.openclaw_session_id or self.build_session_key(agent.name) + try: + await ensure_session(session_key, config=client_config, label=agent.name) + if not agent.openclaw_session_id: + agent.openclaw_session_id = session_key + self.session.add(agent) + await self.session.commit() + await self.session.refresh(agent) + except OpenClawGatewayError as exc: + self.record_instruction_failure(self.session, agent, str(exc), "update") + await self.session.commit() + + @staticmethod + def mark_agent_update_pending(agent: Agent) -> str: + raw_token = generate_agent_token() + agent.agent_token_hash = hash_agent_token(raw_token) + agent.provision_requested_at = utcnow() + agent.provision_action = "update" + agent.status = "updating" + return raw_token + + async def provision_updated_agent( + self, + *, + agent: Agent, + request: AgentUpdateProvisionRequest, + ) -> None: + execution: AbstractProvisionExecution + if request.target.is_main_agent: + execution = MainAgentProvisionExecution( + service=self, + agent=agent, + provision_request=request, + action="update", + wakeup_verb="updated", + raise_gateway_errors=True, + ) + else: + execution = BoardAgentProvisionExecution( + service=self, + agent=agent, + provision_request=request, + action="update", + wakeup_verb="updated", + raise_gateway_errors=True, + ) + await execution.execute() + + @staticmethod + def heartbeat_lookup_statement(payload: AgentHeartbeatCreate) -> SelectOfScalar[Agent]: + statement = Agent.objects.filter_by(name=payload.name).statement + if payload.board_id is not None: + statement = statement.where(Agent.board_id == payload.board_id) + return statement + + async def create_agent_from_heartbeat( + self, + *, + payload: AgentHeartbeatCreate, + actor: ActorContextLike, + ) -> Agent: + if actor.actor_type == "agent": + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) + if actor.actor_type == "user": + ctx = await self.require_user_context(actor.user) + if not is_org_admin(ctx.member): + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) + + board = await self.require_board( + payload.board_id, + user=actor.user, + write=True, + ) + gateway, client_config = await self.require_gateway(board) + data: dict[str, Any] = { + "name": payload.name, + "board_id": board.id, + "gateway_id": gateway.id, + "heartbeat_config": DEFAULT_HEARTBEAT_CONFIG.copy(), + } + agent, raw_token, session_error = await self.persist_new_agent( + data=data, + client_config=client_config, + ) + await self.record_session_creation( + agent=agent, + session_error=session_error, + ) + await self.provision_new_agent( + agent=agent, + request=AgentProvisionRequest( + board=board, + gateway=gateway, + auth_token=raw_token, + user=actor.user, + options=ProvisionOptions(action="provision"), + ), + client_config=client_config, + ) + return agent + + async def handle_existing_user_heartbeat_agent( + self, + *, + agent: Agent, + user: User | None, + ) -> None: + ctx = await self.require_user_context(user) + await self.require_agent_access(agent=agent, ctx=ctx, write=True) + + if agent.agent_token_hash is not None: + return + + raw_token = generate_agent_token() + agent.agent_token_hash = hash_agent_token(raw_token) + if agent.heartbeat_config is None: + agent.heartbeat_config = DEFAULT_HEARTBEAT_CONFIG.copy() + agent.provision_requested_at = utcnow() + agent.provision_action = "provision" + self.session.add(agent) + await self.session.commit() + await self.session.refresh(agent) + board = await self.require_board( + str(agent.board_id) if agent.board_id else None, + user=user, + write=True, + ) + gateway, client_config = await self.require_gateway(board) + await self.provision_new_agent( + agent=agent, + request=AgentProvisionRequest( + board=board, + gateway=gateway, + auth_token=raw_token, + user=user, + options=ProvisionOptions(action="provision"), + ), + client_config=client_config, + ) + + async def ensure_heartbeat_session_key( + self, + *, + agent: Agent, + actor: ActorContextLike, + ) -> None: + if agent.openclaw_session_id: + return + board = await self.require_board( + str(agent.board_id) if agent.board_id else None, + user=actor.user if actor.actor_type == "user" else None, + write=actor.actor_type == "user", + ) + _, client_config = await self.require_gateway(board) + session_key, session_error = await self.ensure_gateway_session( + agent.name, + client_config, + ) + agent.openclaw_session_id = session_key + self.session.add(agent) + await self.record_session_creation( + agent=agent, + session_error=session_error, + ) + + async def commit_heartbeat( + self, + *, + agent: Agent, + status_value: str | None, + ) -> AgentRead: + if status_value: + agent.status = status_value + elif agent.status == "provisioning": + agent.status = "online" + agent.last_seen_at = utcnow() + agent.updated_at = utcnow() + self.record_heartbeat(self.session, agent) + self.session.add(agent) + await self.session.commit() + await self.session.refresh(agent) + return self.to_agent_read(self.with_computed_status(agent)) + + async def list_agents( + self, + *, + board_id: UUID | None, + gateway_id: UUID | None, + ctx: OrganizationContext, + ) -> LimitOffsetPage[AgentRead]: + board_ids = await list_accessible_board_ids(self.session, member=ctx.member, write=False) + if board_id is not None and board_id not in set(board_ids): + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) + base_filters: list[ColumnElement[bool]] = [] + if board_ids: + base_filters.append(col(Agent.board_id).in_(board_ids)) + if is_org_admin(ctx.member): + gateways = await Gateway.objects.filter_by( + organization_id=ctx.organization.id, + ).all(self.session) + gateway_ids = [gateway.id for gateway in gateways] + if gateway_ids: + base_filters.append( + (col(Agent.gateway_id).in_(gateway_ids)) & (col(Agent.board_id).is_(None)), + ) + if base_filters: + if len(base_filters) == 1: + statement = select(Agent).where(base_filters[0]) + else: + statement = select(Agent).where(or_(*base_filters)) + else: + statement = select(Agent).where(col(Agent.id).is_(None)) + if board_id is not None: + statement = statement.where(col(Agent.board_id) == board_id) + if gateway_id is not None: + gateway = await Gateway.objects.by_id(gateway_id).first(self.session) + if gateway is None or gateway.organization_id != ctx.organization.id: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) + gateway_board_ids = select(Board.id).where(col(Board.gateway_id) == gateway_id) + statement = statement.where( + or_( + col(Agent.board_id).in_(gateway_board_ids), + (col(Agent.gateway_id) == gateway_id) & (col(Agent.board_id).is_(None)), + ), + ) + statement = statement.order_by(col(Agent.created_at).desc()) + + def _transform(items: Sequence[Any]) -> Sequence[Any]: + agents = self.coerce_agent_items(items) + return [self.to_agent_read(self.with_computed_status(agent)) for agent in agents] + + return await paginate(self.session, statement, transformer=_transform) + + async def stream_agents( + self, + *, + request: Request, + board_id: UUID | None, + since: str | None, + ctx: OrganizationContext, + ) -> EventSourceResponse: + since_dt = self.parse_since(since) or utcnow() + last_seen = since_dt + board_ids = await list_accessible_board_ids(self.session, member=ctx.member, write=False) + allowed_ids = set(board_ids) + if board_id is not None and board_id not in allowed_ids: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) + + async def event_generator() -> AsyncIterator[dict[str, str]]: + nonlocal last_seen + while True: + if await request.is_disconnected(): + break + async with async_session_maker() as stream_session: + stream_service = AgentLifecycleService(stream_session) + stream_service.logger = self.logger + if board_id is not None: + agents = await stream_service.fetch_agent_events( + board_id, + last_seen, + ) + elif allowed_ids: + agents = await stream_service.fetch_agent_events(None, last_seen) + agents = [agent for agent in agents if agent.board_id in allowed_ids] + else: + agents = [] + for agent in agents: + updated_at = agent.updated_at or agent.last_seen_at or utcnow() + last_seen = max(updated_at, last_seen) + payload = {"agent": self.serialize_agent(agent)} + yield {"event": "agent", "data": json.dumps(payload)} + await asyncio.sleep(2) + + return EventSourceResponse(event_generator(), ping=15) + + async def create_agent( + self, + *, + payload: AgentCreate, + actor: ActorContextLike, + ) -> AgentRead: + self.logger.log( + 5, + "agent.create.start actor_type=%s board_id=%s", + actor.actor_type, + payload.board_id, + ) + payload = await self.coerce_agent_create_payload(payload, actor) + + board = await self.require_board( + payload.board_id, + user=actor.user if actor.actor_type == "user" else None, + write=actor.actor_type == "user", + ) + gateway, client_config = await self.require_gateway(board) + data = payload.model_dump() + data["gateway_id"] = gateway.id + requested_name = (data.get("name") or "").strip() + await self.ensure_unique_agent_name( + board=board, + gateway=gateway, + requested_name=requested_name, + ) + agent, raw_token, session_error = await self.persist_new_agent( + data=data, + client_config=client_config, + ) + await self.record_session_creation( + agent=agent, + session_error=session_error, + ) + provision_request = AgentProvisionRequest( + board=board, + gateway=gateway, + auth_token=raw_token, + user=actor.user if actor.actor_type == "user" else None, + options=ProvisionOptions(action="provision"), + ) + await self.provision_new_agent( + agent=agent, + request=provision_request, + client_config=client_config, + ) + self.logger.info("agent.create.success agent_id=%s board_id=%s", agent.id, board.id) + return self.to_agent_read(self.with_computed_status(agent)) + + async def get_agent( + self, + *, + agent_id: str, + ctx: OrganizationContext, + ) -> AgentRead: + agent = await Agent.objects.by_id(agent_id).first(self.session) + if agent is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) + await self.require_agent_access(agent=agent, ctx=ctx, write=False) + return self.to_agent_read(self.with_computed_status(agent)) + + async def update_agent( + self, + *, + agent_id: str, + payload: AgentUpdate, + options: AgentUpdateOptions, + ) -> AgentRead: + self.logger.log(5, "agent.update.start agent_id=%s force=%s", agent_id, options.force) + agent = await Agent.objects.by_id(agent_id).first(self.session) + if agent is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) + await self.require_agent_access(agent=agent, ctx=options.context, write=True) + updates = payload.model_dump(exclude_unset=True) + make_main = updates.pop("is_gateway_main", None) + await self.validate_agent_update_inputs( + ctx=options.context, + updates=updates, + make_main=make_main, + ) + if not updates and not options.force and make_main is None: + return self.to_agent_read(self.with_computed_status(agent)) + main_gateway, gateway_for_main = await self.apply_agent_update_mutations( + agent=agent, + updates=updates, + make_main=make_main, + ) + target = await self.resolve_agent_update_target( + agent=agent, + make_main=make_main, + main_gateway=main_gateway, + gateway_for_main=gateway_for_main, + ) + await self.ensure_agent_update_session( + agent=agent, + client_config=target.client_config, + ) + raw_token = self.mark_agent_update_pending(agent) + self.session.add(agent) + await self.session.commit() + await self.session.refresh(agent) + provision_request = AgentUpdateProvisionRequest( + target=target, + raw_token=raw_token, + user=options.user, + force_bootstrap=options.force, + ) + await self.provision_updated_agent( + agent=agent, + request=provision_request, + ) + self.logger.info("agent.update.success agent_id=%s", agent.id) + return self.to_agent_read(self.with_computed_status(agent)) + + async def heartbeat_agent( + self, + *, + agent_id: str, + payload: AgentHeartbeat, + actor: ActorContextLike, + ) -> AgentRead: + self.logger.log( + 5, "agent.heartbeat.start agent_id=%s actor_type=%s", agent_id, actor.actor_type + ) + agent = await Agent.objects.by_id(agent_id).first(self.session) + if agent is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) + if actor.actor_type == "agent" and actor.agent and actor.agent.id != agent.id: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) + if actor.actor_type == "user": + ctx = await self.require_user_context(actor.user) + if not is_org_admin(ctx.member): + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) + await self.require_agent_access(agent=agent, ctx=ctx, write=True) + return await self.commit_heartbeat( + agent=agent, + status_value=payload.status, + ) + + async def heartbeat_or_create_agent( + self, + *, + payload: AgentHeartbeatCreate, + actor: ActorContextLike, + ) -> AgentRead: + self.logger.log( + 5, + "agent.heartbeat_or_create.start actor_type=%s name=%s board_id=%s", + actor.actor_type, + payload.name, + payload.board_id, + ) + if actor.actor_type == "agent" and actor.agent: + return await self.heartbeat_agent( + agent_id=str(actor.agent.id), + payload=AgentHeartbeat(status=payload.status), + actor=actor, + ) + + agent = (await self.session.exec(self.heartbeat_lookup_statement(payload))).first() + if agent is None: + agent = await self.create_agent_from_heartbeat( + payload=payload, + actor=actor, + ) + elif actor.actor_type == "user": + await self.handle_existing_user_heartbeat_agent( + agent=agent, + user=actor.user, + ) + elif actor.actor_type == "agent" and actor.agent and actor.agent.id != agent.id: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) + + await self.ensure_heartbeat_session_key( + agent=agent, + actor=actor, + ) + return await self.commit_heartbeat( + agent=agent, + status_value=payload.status, + ) + + async def delete_agent( + self, + *, + agent_id: str, + ctx: OrganizationContext, + ) -> OkResponse: + self.logger.log(5, "agent.delete.start agent_id=%s", agent_id) + agent = await Agent.objects.by_id(agent_id).first(self.session) + if agent is None: + return OkResponse() + await self.require_agent_access(agent=agent, ctx=ctx, write=True) + + board = await self.require_board(str(agent.board_id) if agent.board_id else None) + gateway, client_config = await self.require_gateway(board) + try: + workspace_path = await cleanup_agent(agent, gateway) + except OpenClawGatewayError as exc: + self.record_instruction_failure(self.session, agent, str(exc), "delete") + await self.session.commit() + raise HTTPException( + status_code=status.HTTP_502_BAD_GATEWAY, + detail=f"Gateway cleanup failed: {exc}", + ) from exc + except (OSError, RuntimeError, ValueError) as exc: # pragma: no cover + self.record_instruction_failure(self.session, agent, str(exc), "delete") + await self.session.commit() + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Workspace cleanup failed: {exc}", + ) from exc + + record_activity( + self.session, + event_type="agent.delete.direct", + message=f"Deleted agent {agent.name}.", + agent_id=None, + ) + now = utcnow() + await crud.update_where( + self.session, + Task, + col(Task.assigned_agent_id) == agent.id, + col(Task.status) == "in_progress", + assigned_agent_id=None, + status="inbox", + in_progress_at=None, + updated_at=now, + commit=False, + ) + await crud.update_where( + self.session, + Task, + col(Task.assigned_agent_id) == agent.id, + col(Task.status) != "in_progress", + assigned_agent_id=None, + updated_at=now, + commit=False, + ) + await crud.update_where( + self.session, + ActivityEvent, + col(ActivityEvent.agent_id) == agent.id, + agent_id=None, + commit=False, + ) + await self.session.delete(agent) + await self.session.commit() + + try: + main_session = GatewayAgentIdentity.session_key(gateway) + if main_session and workspace_path: + cleanup_message = ( + "Cleanup request for deleted agent.\n\n" + f"Agent name: {agent.name}\n" + f"Agent id: {agent.id}\n" + f"Workspace path: {workspace_path}\n\n" + "Actions:\n" + "1) Remove the workspace directory.\n" + "2) Reply NO_REPLY.\n" + ) + await ensure_session(main_session, config=client_config, label="Gateway Agent") + await send_message( + cleanup_message, + session_key=main_session, + config=client_config, + deliver=False, + ) + except (OSError, OpenClawGatewayError, ValueError): + pass + self.logger.info("agent.delete.success agent_id=%s", agent_id) + return OkResponse() + + +class AbstractGatewayMessagingService(ABC): + """Shared gateway messaging primitives with retry semantics.""" + + def __init__(self, session: AsyncSession) -> None: + self._session = session + self._logger = logging.getLogger(__name__) + + @property + def session(self) -> AsyncSession: + return self._session + + @session.setter + def session(self, value: AsyncSession) -> None: + self._session = value + + @property + def logger(self) -> logging.Logger: + return self._logger + + @logger.setter + def logger(self, value: logging.Logger) -> None: + self._logger = value + + @staticmethod + async def _with_gateway_retry(fn: Callable[[], Awaitable[_T]]) -> _T: + return await _with_coordination_gateway_retry(fn) + + async def _dispatch_gateway_message( + self, + *, + session_key: str, + config: GatewayClientConfig, + agent_name: str, + message: str, + deliver: bool, + ) -> None: + async def _do_send() -> bool: + await send_gateway_agent_message( + session_key=session_key, + config=config, + agent_name=agent_name, + message=message, + deliver=deliver, + ) + return True + + await self._with_gateway_retry(_do_send) + + +class GatewayCoordinationService(AbstractGatewayMessagingService): + """Gateway-main and lead coordination workflows used by agent-facing routes.""" + + @staticmethod + def _build_gateway_lead_message( + *, + board: Board, + actor_agent_name: str, + kind: str, + content: str, + correlation_id: str | None, + reply_tags: list[str] | None, + reply_source: str | None, + ) -> str: + base_url = settings.base_url or "http://localhost:8000" + header = "GATEWAY MAIN QUESTION" if kind == "question" else "GATEWAY MAIN HANDOFF" + correlation = correlation_id.strip() if correlation_id else "" + correlation_line = f"Correlation ID: {correlation}\n" if correlation else "" + tags_json = json.dumps(reply_tags or ["gateway_main", "lead_reply"]) + source = reply_source or "lead_to_gateway_main" + return ( + f"{header}\n" + f"Board: {board.name}\n" + f"Board ID: {board.id}\n" + f"From agent: {actor_agent_name}\n" + f"{correlation_line}\n" + f"{content.strip()}\n\n" + "Reply to the gateway agent by writing a NON-chat memory item on this board:\n" + f"POST {base_url}/api/v1/agent/boards/{board.id}/memory\n" + f'Body: {{"content":"...","tags":{tags_json},"source":"{source}"}}\n' + "Do NOT reply in OpenClaw chat." + ) + + async def require_gateway_main_actor( + self, + actor_agent: Agent, + ) -> tuple[Gateway, GatewayClientConfig]: + detail = "Only the dedicated gateway agent may call this endpoint." + if actor_agent.board_id is not None: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=detail) + gateway = await Gateway.objects.by_id(actor_agent.gateway_id).first(self.session) + if gateway is None: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=detail) + if actor_agent.openclaw_session_id != GatewayAgentIdentity.session_key(gateway): + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=detail) + if not gateway.url: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Gateway url is required", + ) + return gateway, GatewayClientConfig(url=gateway.url, token=gateway.token) + + async def require_gateway_board( + self, + *, + gateway: Gateway, + board_id: UUID | str, + ) -> Board: + board = await Board.objects.by_id(board_id).first(self.session) + if board is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Board not found") + if board.gateway_id != gateway.id: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN) + return board + + async def _board_agent_or_404( + self, + *, + board: Board, + agent_id: str, + ) -> Agent: + target = await Agent.objects.by_id(agent_id).first(self.session) + if target is None or (target.board_id and target.board_id != board.id): + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) + return target + + @staticmethod + def _gateway_file_content(payload: object) -> str | None: + if isinstance(payload, str): + return payload + if isinstance(payload, dict): + content = payload.get("content") + if isinstance(content, str): + return content + file_obj = payload.get("file") + if isinstance(file_obj, dict): + nested = file_obj.get("content") + if isinstance(nested, str): + return nested + return None + + async def nudge_board_agent( + self, + *, + board: Board, + actor_agent: Agent, + target_agent_id: str, + message: str, + correlation_id: str | None = None, + ) -> None: + trace_id = resolve_trace_id(correlation_id, prefix="coord.nudge") + self.logger.log( + 5, + "gateway.coordination.nudge.start trace_id=%s board_id=%s actor_agent_id=%s " + "target_agent_id=%s", + trace_id, + board.id, + actor_agent.id, + target_agent_id, + ) + target = await self._board_agent_or_404(board=board, agent_id=target_agent_id) + if not target.openclaw_session_id: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Target agent has no session key", + ) + _gateway, config = await require_gateway_config_for_board(self.session, board) + try: + await self._dispatch_gateway_message( + session_key=target.openclaw_session_id or "", + config=config, + agent_name=target.name, + message=message, + deliver=True, + ) + except (OpenClawGatewayError, TimeoutError) as exc: + record_activity( + self.session, + event_type="agent.nudge.failed", + message=f"Nudge failed for {target.name}: {exc}", + agent_id=actor_agent.id, + ) + await self.session.commit() + self.logger.error( + "gateway.coordination.nudge.failed trace_id=%s board_id=%s actor_agent_id=%s " + "target_agent_id=%s error=%s", + trace_id, + board.id, + actor_agent.id, + target_agent_id, + str(exc), + ) + raise map_gateway_error_to_http_exception(GatewayOperation.NUDGE_AGENT, exc) from exc + except Exception as exc: # pragma: no cover - defensive guard + self.logger.critical( + "gateway.coordination.nudge.failed_unexpected trace_id=%s board_id=%s " + "actor_agent_id=%s target_agent_id=%s error_type=%s error=%s", + trace_id, + board.id, + actor_agent.id, + target_agent_id, + exc.__class__.__name__, + str(exc), + ) + raise + record_activity( + self.session, + event_type="agent.nudge.sent", + message=f"Nudge sent to {target.name}.", + agent_id=actor_agent.id, + ) + await self.session.commit() + self.logger.info( + "gateway.coordination.nudge.success trace_id=%s board_id=%s actor_agent_id=%s " + "target_agent_id=%s", + trace_id, + board.id, + actor_agent.id, + target_agent_id, + ) + + async def get_agent_soul( + self, + *, + board: Board, + target_agent_id: str, + correlation_id: str | None = None, + ) -> str: + trace_id = resolve_trace_id(correlation_id, prefix="coord.soul.read") + self.logger.log( + 5, + "gateway.coordination.soul_read.start trace_id=%s board_id=%s target_agent_id=%s", + trace_id, + board.id, + target_agent_id, + ) + target = await self._board_agent_or_404(board=board, agent_id=target_agent_id) + _gateway, config = await require_gateway_config_for_board(self.session, board) + try: + + async def _do_get() -> object: + return await openclaw_call( + "agents.files.get", + {"agentId": _agent_key(target), "name": "SOUL.md"}, + config=config, + ) + + payload = await self._with_gateway_retry(_do_get) + except (OpenClawGatewayError, TimeoutError) as exc: + self.logger.error( + "gateway.coordination.soul_read.failed trace_id=%s board_id=%s " + "target_agent_id=%s error=%s", + trace_id, + board.id, + target_agent_id, + str(exc), + ) + raise map_gateway_error_to_http_exception(GatewayOperation.SOUL_READ, exc) from exc + except Exception as exc: # pragma: no cover - defensive guard + self.logger.critical( + "gateway.coordination.soul_read.failed_unexpected trace_id=%s board_id=%s " + "target_agent_id=%s error_type=%s error=%s", + trace_id, + board.id, + target_agent_id, + exc.__class__.__name__, + str(exc), + ) + raise + content = self._gateway_file_content(payload) + if content is None: + raise HTTPException( + status_code=status.HTTP_502_BAD_GATEWAY, + detail="Invalid gateway response", + ) + self.logger.info( + "gateway.coordination.soul_read.success trace_id=%s board_id=%s target_agent_id=%s", + trace_id, + board.id, + target_agent_id, + ) + return content + + async def update_agent_soul( + self, + *, + board: Board, + target_agent_id: str, + content: str, + reason: str | None, + source_url: str | None, + actor_agent_id: UUID, + correlation_id: str | None = None, + ) -> None: + trace_id = resolve_trace_id(correlation_id, prefix="coord.soul.write") + self.logger.log( + 5, + "gateway.coordination.soul_write.start trace_id=%s board_id=%s target_agent_id=%s " + "actor_agent_id=%s", + trace_id, + board.id, + target_agent_id, + actor_agent_id, + ) + target = await self._board_agent_or_404(board=board, agent_id=target_agent_id) + normalized_content = content.strip() + if not normalized_content: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="content is required", + ) + + target.soul_template = normalized_content + target.updated_at = utcnow() + self.session.add(target) + await self.session.commit() + + _gateway, config = await require_gateway_config_for_board(self.session, board) + try: + + async def _do_set() -> object: + return await openclaw_call( + "agents.files.set", + { + "agentId": _agent_key(target), + "name": "SOUL.md", + "content": normalized_content, + }, + config=config, + ) + + await self._with_gateway_retry(_do_set) + except (OpenClawGatewayError, TimeoutError) as exc: + self.logger.error( + "gateway.coordination.soul_write.failed trace_id=%s board_id=%s " + "target_agent_id=%s actor_agent_id=%s error=%s", + trace_id, + board.id, + target_agent_id, + actor_agent_id, + str(exc), + ) + raise map_gateway_error_to_http_exception(GatewayOperation.SOUL_WRITE, exc) from exc + except Exception as exc: # pragma: no cover - defensive guard + self.logger.critical( + "gateway.coordination.soul_write.failed_unexpected trace_id=%s board_id=%s " + "target_agent_id=%s actor_agent_id=%s error_type=%s error=%s", + trace_id, + board.id, + target_agent_id, + actor_agent_id, + exc.__class__.__name__, + str(exc), + ) + raise + + reason_text = (reason or "").strip() + source_url_text = (source_url or "").strip() + note = f"SOUL.md updated for {target.name}." + if reason_text: + note = f"{note} Reason: {reason_text}" + if source_url_text: + note = f"{note} Source: {source_url_text}" + record_activity( + self.session, + event_type="agent.soul.updated", + message=note, + agent_id=actor_agent_id, + ) + await self.session.commit() + self.logger.info( + "gateway.coordination.soul_write.success trace_id=%s board_id=%s target_agent_id=%s " + "actor_agent_id=%s", + trace_id, + board.id, + target_agent_id, + actor_agent_id, + ) + + async def ask_user_via_gateway_main( + self, + *, + board: Board, + payload: GatewayMainAskUserRequest, + actor_agent: Agent, + ) -> GatewayMainAskUserResponse: + trace_id = resolve_trace_id(payload.correlation_id, prefix="coord.ask_user") + self.logger.log( + 5, + "gateway.coordination.ask_user.start trace_id=%s board_id=%s actor_agent_id=%s", + trace_id, + board.id, + actor_agent.id, + ) + gateway, config = await require_gateway_config_for_board(self.session, board) + main_session_key = GatewayAgentIdentity.session_key(gateway) + + correlation = payload.correlation_id.strip() if payload.correlation_id else "" + correlation_line = f"Correlation ID: {correlation}\n" if correlation else "" + preferred_channel = (payload.preferred_channel or "").strip() + channel_line = f"Preferred channel: {preferred_channel}\n" if preferred_channel else "" + tags = payload.reply_tags or ["gateway_main", "user_reply"] + tags_json = json.dumps(tags) + reply_source = payload.reply_source or "user_via_gateway_main" + base_url = settings.base_url or "http://localhost:8000" + message = ( + "LEAD REQUEST: ASK USER\n" + f"Board: {board.name}\n" + f"Board ID: {board.id}\n" + f"From lead: {actor_agent.name}\n" + f"{correlation_line}" + f"{channel_line}\n" + f"{payload.content.strip()}\n\n" + "Please reach the user via your configured OpenClaw channel(s) " + "(Slack/SMS/etc).\n" + "If you cannot reach them there, post the question in Mission Control " + "board chat as a fallback.\n\n" + "When you receive the answer, reply in Mission Control by writing a " + "NON-chat memory item on this board:\n" + f"POST {base_url}/api/v1/agent/boards/{board.id}/memory\n" + f'Body: {{"content":"","tags":{tags_json},"source":"{reply_source}"}}\n' + "Do NOT reply in OpenClaw chat." + ) + try: + await self._dispatch_gateway_message( + session_key=main_session_key, + config=config, + agent_name="Gateway Agent", + message=message, + deliver=True, + ) + except (OpenClawGatewayError, TimeoutError) as exc: + record_activity( + self.session, + event_type="gateway.lead.ask_user.failed", + message=f"Lead user question failed for {board.name}: {exc}", + agent_id=actor_agent.id, + ) + await self.session.commit() + self.logger.error( + "gateway.coordination.ask_user.failed trace_id=%s board_id=%s actor_agent_id=%s " + "error=%s", + trace_id, + board.id, + actor_agent.id, + str(exc), + ) + raise map_gateway_error_to_http_exception( + GatewayOperation.ASK_USER_DISPATCH, + exc, + ) from exc + except Exception as exc: # pragma: no cover - defensive guard + self.logger.critical( + "gateway.coordination.ask_user.failed_unexpected trace_id=%s board_id=%s " + "actor_agent_id=%s error_type=%s error=%s", + trace_id, + board.id, + actor_agent.id, + exc.__class__.__name__, + str(exc), + ) + raise + + record_activity( + self.session, + event_type="gateway.lead.ask_user.sent", + message=f"Lead requested user info via gateway agent for board: {board.name}.", + agent_id=actor_agent.id, + ) + main_agent = await Agent.objects.filter_by(gateway_id=gateway.id, board_id=None).first( + self.session, + ) + await self.session.commit() + self.logger.info( + "gateway.coordination.ask_user.success trace_id=%s board_id=%s actor_agent_id=%s " + "main_agent_id=%s", + trace_id, + board.id, + actor_agent.id, + main_agent.id if main_agent else None, + ) + return GatewayMainAskUserResponse( + board_id=board.id, + main_agent_id=main_agent.id if main_agent else None, + main_agent_name=main_agent.name if main_agent else None, + ) + + async def _ensure_and_message_board_lead( + self, + *, + gateway: Gateway, + config: GatewayClientConfig, + board: Board, + message: str, + ) -> tuple[Agent, bool]: + lead, lead_created = await ensure_board_lead_agent( + self.session, + request=LeadAgentRequest( + board=board, + gateway=gateway, + config=config, + user=None, + options=LeadAgentOptions(action="provision"), + ), + ) + if not lead.openclaw_session_id: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Lead agent has no session key", + ) + await self._dispatch_gateway_message( + session_key=lead.openclaw_session_id or "", + config=config, + agent_name=lead.name, + message=message, + deliver=False, + ) + return lead, lead_created + + async def message_gateway_board_lead( + self, + *, + actor_agent: Agent, + board_id: UUID, + payload: GatewayLeadMessageRequest, + ) -> GatewayLeadMessageResponse: + trace_id = resolve_trace_id(payload.correlation_id, prefix="coord.lead_message") + self.logger.log( + 5, + "gateway.coordination.lead_message.start trace_id=%s board_id=%s actor_agent_id=%s", + trace_id, + board_id, + actor_agent.id, + ) + gateway, config = await self.require_gateway_main_actor(actor_agent) + board = await self.require_gateway_board(gateway=gateway, board_id=board_id) + message = self._build_gateway_lead_message( + board=board, + actor_agent_name=actor_agent.name, + kind=payload.kind, + content=payload.content, + correlation_id=payload.correlation_id, + reply_tags=payload.reply_tags, + reply_source=payload.reply_source, + ) + + try: + lead, lead_created = await self._ensure_and_message_board_lead( + gateway=gateway, + config=config, + board=board, + message=message, + ) + except (OpenClawGatewayError, TimeoutError) as exc: + record_activity( + self.session, + event_type="gateway.main.lead_message.failed", + message=f"Lead message failed for {board.name}: {exc}", + agent_id=actor_agent.id, + ) + await self.session.commit() + self.logger.error( + "gateway.coordination.lead_message.failed trace_id=%s board_id=%s " + "actor_agent_id=%s error=%s", + trace_id, + board.id, + actor_agent.id, + str(exc), + ) + raise map_gateway_error_to_http_exception( + GatewayOperation.LEAD_MESSAGE_DISPATCH, + exc, + ) from exc + except Exception as exc: # pragma: no cover - defensive guard + self.logger.critical( + "gateway.coordination.lead_message.failed_unexpected trace_id=%s board_id=%s " + "actor_agent_id=%s error_type=%s error=%s", + trace_id, + board.id, + actor_agent.id, + exc.__class__.__name__, + str(exc), + ) + raise + + record_activity( + self.session, + event_type="gateway.main.lead_message.sent", + message=f"Sent {payload.kind} to lead for board: {board.name}.", + agent_id=actor_agent.id, + ) + await self.session.commit() + self.logger.info( + "gateway.coordination.lead_message.success trace_id=%s board_id=%s " + "actor_agent_id=%s lead_agent_id=%s", + trace_id, + board.id, + actor_agent.id, + lead.id, + ) + return GatewayLeadMessageResponse( + board_id=board.id, + lead_agent_id=lead.id, + lead_agent_name=lead.name, + lead_created=lead_created, + ) + + async def broadcast_gateway_lead_message( + self, + *, + actor_agent: Agent, + payload: GatewayLeadBroadcastRequest, + ) -> GatewayLeadBroadcastResponse: + trace_id = resolve_trace_id(payload.correlation_id, prefix="coord.lead_broadcast") + self.logger.log( + 5, + "gateway.coordination.lead_broadcast.start trace_id=%s actor_agent_id=%s", + trace_id, + actor_agent.id, + ) + gateway, config = await self.require_gateway_main_actor(actor_agent) + statement = ( + select(Board) + .where(col(Board.gateway_id) == gateway.id) + .order_by(col(Board.created_at).desc()) + ) + if payload.board_ids: + statement = statement.where(col(Board.id).in_(payload.board_ids)) + boards = list(await self.session.exec(statement)) + + results: list[GatewayLeadBroadcastBoardResult] = [] + sent = 0 + failed = 0 + + for board in boards: + message = self._build_gateway_lead_message( + board=board, + actor_agent_name=actor_agent.name, + kind=payload.kind, + content=payload.content, + correlation_id=payload.correlation_id, + reply_tags=payload.reply_tags, + reply_source=payload.reply_source, + ) + try: + lead, _lead_created = await self._ensure_and_message_board_lead( + gateway=gateway, + config=config, + board=board, + message=message, + ) + board_result = GatewayLeadBroadcastBoardResult( + board_id=board.id, + lead_agent_id=lead.id, + lead_agent_name=lead.name, + ok=True, + ) + sent += 1 + except (HTTPException, OpenClawGatewayError, TimeoutError, ValueError) as exc: + board_result = GatewayLeadBroadcastBoardResult( + board_id=board.id, + ok=False, + error=map_gateway_error_message( + GatewayOperation.LEAD_BROADCAST_DISPATCH, + exc, + ), + ) + failed += 1 + results.append(board_result) + + record_activity( + self.session, + event_type="gateway.main.lead_broadcast.sent", + message=f"Broadcast {payload.kind} to {sent} board leads (failed: {failed}).", + agent_id=actor_agent.id, + ) + await self.session.commit() + self.logger.info( + "gateway.coordination.lead_broadcast.success trace_id=%s actor_agent_id=%s sent=%s " + "failed=%s", + trace_id, + actor_agent.id, + sent, + failed, + ) + return GatewayLeadBroadcastResponse( + ok=True, + sent=sent, + failed=failed, + results=results, + ) + + +class BoardOnboardingMessagingService(AbstractGatewayMessagingService): + """Gateway message dispatch helpers for onboarding routes.""" + + async def dispatch_start_prompt( + self, + *, + board: Board, + prompt: str, + correlation_id: str | None = None, + ) -> str: + trace_id = resolve_trace_id(correlation_id, prefix="onboarding.start") + self.logger.log( + 5, + "gateway.onboarding.start_dispatch.start trace_id=%s board_id=%s", + trace_id, + board.id, + ) + gateway, config = await require_gateway_config_for_board(self.session, board) + session_key = GatewayAgentIdentity.session_key(gateway) + try: + await self._dispatch_gateway_message( + session_key=session_key, + config=config, + agent_name="Gateway Agent", + message=prompt, + deliver=False, + ) + except (OpenClawGatewayError, TimeoutError) as exc: + self.logger.error( + "gateway.onboarding.start_dispatch.failed trace_id=%s board_id=%s error=%s", + trace_id, + board.id, + str(exc), + ) + raise map_gateway_error_to_http_exception( + GatewayOperation.ONBOARDING_START_DISPATCH, + exc, + ) from exc + except Exception as exc: # pragma: no cover - defensive guard + self.logger.critical( + "gateway.onboarding.start_dispatch.failed_unexpected trace_id=%s board_id=%s " + "error_type=%s error=%s", + trace_id, + board.id, + exc.__class__.__name__, + str(exc), + ) + raise + self.logger.info( + "gateway.onboarding.start_dispatch.success trace_id=%s board_id=%s session_key=%s", + trace_id, + board.id, + session_key, + ) + return session_key + + async def dispatch_answer( + self, + *, + board: Board, + onboarding: BoardOnboardingSession, + answer_text: str, + correlation_id: str | None = None, + ) -> None: + trace_id = resolve_trace_id(correlation_id, prefix="onboarding.answer") + self.logger.log( + 5, + "gateway.onboarding.answer_dispatch.start trace_id=%s board_id=%s onboarding_id=%s", + trace_id, + board.id, + onboarding.id, + ) + _gateway, config = await require_gateway_config_for_board(self.session, board) + try: + await self._dispatch_gateway_message( + session_key=onboarding.session_key, + config=config, + agent_name="Gateway Agent", + message=answer_text, + deliver=False, + ) + except (OpenClawGatewayError, TimeoutError) as exc: + self.logger.error( + "gateway.onboarding.answer_dispatch.failed trace_id=%s board_id=%s " + "onboarding_id=%s error=%s", + trace_id, + board.id, + onboarding.id, + str(exc), + ) + raise map_gateway_error_to_http_exception( + GatewayOperation.ONBOARDING_ANSWER_DISPATCH, + exc, + ) from exc + except Exception as exc: # pragma: no cover - defensive guard + self.logger.critical( + "gateway.onboarding.answer_dispatch.failed_unexpected trace_id=%s board_id=%s " + "onboarding_id=%s error_type=%s error=%s", + trace_id, + board.id, + onboarding.id, + exc.__class__.__name__, + str(exc), + ) + raise + self.logger.info( + "gateway.onboarding.answer_dispatch.success trace_id=%s board_id=%s onboarding_id=%s", + trace_id, + board.id, + onboarding.id, + ) diff --git a/backend/app/services/openclaw/shared.py b/backend/app/services/openclaw/shared.py new file mode 100644 index 00000000..7f7ac214 --- /dev/null +++ b/backend/app/services/openclaw/shared.py @@ -0,0 +1,98 @@ +"""Shared OpenClaw lifecycle primitives.""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING +from uuid import UUID, uuid4 + +from fastapi import HTTPException, status + +from app.integrations.openclaw_gateway import GatewayConfig as GatewayClientConfig +from app.integrations.openclaw_gateway import ensure_session, send_message +from app.models.boards import Board +from app.models.gateways import Gateway +from app.services.openclaw.constants import ( + _GATEWAY_AGENT_PREFIX, + _GATEWAY_AGENT_SUFFIX, + _GATEWAY_OPENCLAW_AGENT_PREFIX, +) + +if TYPE_CHECKING: + from sqlmodel.ext.asyncio.session import AsyncSession + + +class GatewayAgentIdentity: + """Naming and identity rules for Mission Control gateway-main agents.""" + + @classmethod + def session_key_for_id(cls, gateway_id: UUID) -> str: + return f"{_GATEWAY_AGENT_PREFIX}{gateway_id}{_GATEWAY_AGENT_SUFFIX}" + + @classmethod + def session_key(cls, gateway: Gateway) -> str: + return cls.session_key_for_id(gateway.id) + + @classmethod + def openclaw_agent_id_for_id(cls, gateway_id: UUID) -> str: + return f"{_GATEWAY_OPENCLAW_AGENT_PREFIX}{gateway_id}" + + @classmethod + def openclaw_agent_id(cls, gateway: Gateway) -> str: + return cls.openclaw_agent_id_for_id(gateway.id) + + +async def optional_gateway_config_for_board( + session: AsyncSession, + board: Board, +) -> GatewayClientConfig | None: + """Return gateway client config when board has a reachable configured gateway.""" + if board.gateway_id is None: + return None + gateway = await Gateway.objects.by_id(board.gateway_id).first(session) + if gateway is None or not gateway.url: + return None + return GatewayClientConfig(url=gateway.url, token=gateway.token) + + +async def require_gateway_config_for_board( + session: AsyncSession, + board: Board, +) -> tuple[Gateway, GatewayClientConfig]: + """Resolve board gateway and config, raising 422 when unavailable.""" + if board.gateway_id is None: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Board is not attached to a gateway", + ) + gateway = await Gateway.objects.by_id(board.gateway_id).first(session) + if gateway is None or not gateway.url: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Gateway is not configured for this board", + ) + return gateway, GatewayClientConfig(url=gateway.url, token=gateway.token) + + +async def send_gateway_agent_message( + *, + session_key: str, + config: GatewayClientConfig, + agent_name: str, + message: str, + deliver: bool = False, +) -> None: + """Ensure session and dispatch a message to an agent session.""" + await ensure_session(session_key, config=config, label=agent_name) + await send_message(message, session_key=session_key, config=config, deliver=deliver) + + +def resolve_trace_id(correlation_id: str | None, *, prefix: str) -> str: + """Resolve a stable trace id from correlation id or generate a scoped fallback.""" + normalized = (correlation_id or "").strip() + if normalized: + return normalized + return f"{prefix}:{uuid4().hex[:12]}" + + +logger = logging.getLogger(__name__) diff --git a/backend/app/services/template_sync.py b/backend/app/services/template_sync.py deleted file mode 100644 index d6148f0d..00000000 --- a/backend/app/services/template_sync.py +++ /dev/null @@ -1,593 +0,0 @@ -"""Gateway template synchronization orchestration.""" - -from __future__ import annotations - -import asyncio -import random -import re -from collections.abc import Awaitable, Callable -from dataclasses import dataclass -from typing import TypeVar -from uuid import UUID, uuid4 - -from sqlalchemy import func -from sqlmodel import col, select -from sqlmodel.ext.asyncio.session import AsyncSession - -from app.core.agent_tokens import generate_agent_token, hash_agent_token, verify_agent_token -from app.core.time import utcnow -from app.integrations.openclaw_gateway import GatewayConfig as GatewayClientConfig -from app.integrations.openclaw_gateway import OpenClawGatewayError, openclaw_call -from app.models.agents import Agent -from app.models.board_memory import BoardMemory -from app.models.boards import Board -from app.models.gateways import Gateway -from app.models.users import User -from app.schemas.gateways import GatewayTemplatesSyncError, GatewayTemplatesSyncResult -from app.services.agent_provisioning import ( - AgentProvisionRequest, - MainAgentProvisionRequest, - ProvisionOptions, - provision_agent, - provision_main_agent, -) -from app.services.gateway_agents import ( - gateway_agent_session_key, - gateway_openclaw_agent_id, -) - -_TOOLS_KV_RE = re.compile(r"^(?P[A-Z0-9_]+)=(?P.*)$") -SESSION_KEY_PARTS_MIN = 2 -_NON_TRANSIENT_GATEWAY_ERROR_MARKERS = ("unsupported file",) -_TRANSIENT_GATEWAY_ERROR_MARKERS = ( - "connect call failed", - "connection refused", - "errno 111", - "econnrefused", - "did not receive a valid http response", - "no route to host", - "network is unreachable", - "host is down", - "name or service not known", - "received 1012", - "service restart", - "http 503", - "http 502", - "http 504", - "temporar", - "timeout", - "timed out", - "connection closed", - "connection reset", -) - -T = TypeVar("T") -_SECURE_RANDOM = random.SystemRandom() -_RUNTIME_TYPE_REFERENCES = (Awaitable, Callable, AsyncSession, Gateway, User, UUID) - - -@dataclass(frozen=True) -class GatewayTemplateSyncOptions: - """Runtime options controlling gateway template synchronization.""" - - user: User | None - include_main: bool = True - reset_sessions: bool = False - rotate_tokens: bool = False - force_bootstrap: bool = False - board_id: UUID | None = None - - -@dataclass(frozen=True) -class _SyncContext: - """Shared state passed to sync helper functions.""" - - session: AsyncSession - gateway: Gateway - config: GatewayClientConfig - backoff: _GatewayBackoff - options: GatewayTemplateSyncOptions - - -def _slugify(value: str) -> str: - slug = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-") - return slug or uuid4().hex - - -def _is_transient_gateway_error(exc: Exception) -> bool: - if not isinstance(exc, OpenClawGatewayError): - return False - message = str(exc).lower() - if not message: - return False - if any(marker in message for marker in _NON_TRANSIENT_GATEWAY_ERROR_MARKERS): - return False - return ("503" in message and "websocket" in message) or any( - marker in message for marker in _TRANSIENT_GATEWAY_ERROR_MARKERS - ) - - -def _gateway_timeout_message(exc: OpenClawGatewayError) -> str: - return "Gateway unreachable after 10 minutes (template sync timeout). " f"Last error: {exc}" - - -class _GatewayBackoff: - def __init__( - self, - *, - timeout_s: float = 10 * 60, - base_delay_s: float = 0.75, - max_delay_s: float = 30.0, - jitter: float = 0.2, - ) -> None: - self._timeout_s = timeout_s - self._base_delay_s = base_delay_s - self._max_delay_s = max_delay_s - self._jitter = jitter - self._delay_s = base_delay_s - - def reset(self) -> None: - self._delay_s = self._base_delay_s - - @staticmethod - async def _attempt( - fn: Callable[[], Awaitable[T]], - ) -> tuple[T | None, OpenClawGatewayError | None]: - try: - return await fn(), None - except OpenClawGatewayError as exc: - return None, exc - - async def run(self, fn: Callable[[], Awaitable[T]]) -> T: - # Use per-call deadlines so long-running syncs can still tolerate a later - # gateway restart without having an already-expired retry window. - deadline_s = asyncio.get_running_loop().time() + self._timeout_s - while True: - value, error = await self._attempt(fn) - if error is not None: - exc = error - if not _is_transient_gateway_error(exc): - raise exc - now = asyncio.get_running_loop().time() - remaining = deadline_s - now - if remaining <= 0: - raise TimeoutError(_gateway_timeout_message(exc)) from exc - - sleep_s = min(self._delay_s, remaining) - if self._jitter: - sleep_s *= 1.0 + _SECURE_RANDOM.uniform( - -self._jitter, - self._jitter, - ) - sleep_s = max(0.0, min(sleep_s, remaining)) - await asyncio.sleep(sleep_s) - self._delay_s = min(self._delay_s * 2.0, self._max_delay_s) - continue - self.reset() - if value is None: - msg = "Gateway retry produced no value without an error" - raise RuntimeError(msg) - return value - - -async def _with_gateway_retry( - fn: Callable[[], Awaitable[T]], - *, - backoff: _GatewayBackoff, -) -> T: - return await backoff.run(fn) - - -def _gateway_agent_id(agent: Agent) -> str: - session_key = agent.openclaw_session_id or "" - if session_key.startswith("agent:"): - parts = session_key.split(":") - if len(parts) >= SESSION_KEY_PARTS_MIN and parts[1]: - return parts[1] - return _slugify(agent.name) - - -def _parse_tools_md(content: str) -> dict[str, str]: - values: dict[str, str] = {} - for raw in content.splitlines(): - line = raw.strip() - if not line or line.startswith("#"): - continue - match = _TOOLS_KV_RE.match(line) - if not match: - continue - values[match.group("key")] = match.group("value").strip() - return values - - -async def _get_agent_file( - *, - agent_gateway_id: str, - name: str, - config: GatewayClientConfig, - backoff: _GatewayBackoff | None = None, -) -> str | None: - try: - - async def _do_get() -> object: - return await openclaw_call( - "agents.files.get", - {"agentId": agent_gateway_id, "name": name}, - config=config, - ) - - payload = await (backoff.run(_do_get) if backoff else _do_get()) - except OpenClawGatewayError: - return None - if isinstance(payload, str): - return payload - if isinstance(payload, dict): - content = payload.get("content") - if isinstance(content, str): - return content - file_obj = payload.get("file") - if isinstance(file_obj, dict): - nested = file_obj.get("content") - if isinstance(nested, str): - return nested - return None - - -async def _get_existing_auth_token( - *, - agent_gateway_id: str, - config: GatewayClientConfig, - backoff: _GatewayBackoff | None = None, -) -> str | None: - tools = await _get_agent_file( - agent_gateway_id=agent_gateway_id, - name="TOOLS.md", - config=config, - backoff=backoff, - ) - if not tools: - return None - values = _parse_tools_md(tools) - token = values.get("AUTH_TOKEN") - if not token: - return None - token = token.strip() - return token or None - - -async def _paused_board_ids(session: AsyncSession, board_ids: list[UUID]) -> set[UUID]: - if not board_ids: - return set() - - commands = {"/pause", "/resume"} - statement = ( - select(BoardMemory.board_id, BoardMemory.content) - .where(col(BoardMemory.board_id).in_(board_ids)) - .where(col(BoardMemory.is_chat).is_(True)) - .where(func.lower(func.trim(col(BoardMemory.content))).in_(commands)) - .order_by(col(BoardMemory.board_id), col(BoardMemory.created_at).desc()) - # Postgres: DISTINCT ON (board_id) to get latest command per board. - .distinct(col(BoardMemory.board_id)) - ) - - paused: set[UUID] = set() - for board_id, content in await session.exec(statement): - cmd = (content or "").strip().lower() - if cmd == "/pause": - paused.add(board_id) - return paused - - -def _append_sync_error( - result: GatewayTemplatesSyncResult, - *, - message: str, - agent: Agent | None = None, - board: Board | None = None, -) -> None: - result.errors.append( - GatewayTemplatesSyncError( - agent_id=agent.id if agent else None, - agent_name=agent.name if agent else None, - board_id=board.id if board else None, - message=message, - ), - ) - - -async def _rotate_agent_token(session: AsyncSession, agent: Agent) -> str: - token = generate_agent_token() - agent.agent_token_hash = hash_agent_token(token) - agent.updated_at = utcnow() - session.add(agent) - await session.commit() - await session.refresh(agent) - return token - - -async def _ping_gateway(ctx: _SyncContext, result: GatewayTemplatesSyncResult) -> bool: - try: - - async def _do_ping() -> object: - return await openclaw_call("agents.list", config=ctx.config) - - await ctx.backoff.run(_do_ping) - except (TimeoutError, OpenClawGatewayError) as exc: - _append_sync_error(result, message=str(exc)) - return False - else: - return True - - -def _base_result( - gateway: Gateway, - *, - include_main: bool, - reset_sessions: bool, -) -> GatewayTemplatesSyncResult: - return GatewayTemplatesSyncResult( - gateway_id=gateway.id, - include_main=include_main, - reset_sessions=reset_sessions, - agents_updated=0, - agents_skipped=0, - main_updated=False, - ) - - -def _boards_by_id( - boards: list[Board], - *, - board_id: UUID | None, -) -> dict[UUID, Board] | None: - boards_by_id = {board.id: board for board in boards} - if board_id is None: - return boards_by_id - board = boards_by_id.get(board_id) - if board is None: - return None - return {board_id: board} - - -async def _resolve_agent_auth_token( - ctx: _SyncContext, - result: GatewayTemplatesSyncResult, - agent: Agent, - board: Board | None, - *, - agent_gateway_id: str, -) -> tuple[str | None, bool]: - try: - auth_token = await _get_existing_auth_token( - agent_gateway_id=agent_gateway_id, - config=ctx.config, - backoff=ctx.backoff, - ) - except TimeoutError as exc: - _append_sync_error(result, agent=agent, board=board, message=str(exc)) - return None, True - - if not auth_token: - if not ctx.options.rotate_tokens: - result.agents_skipped += 1 - _append_sync_error( - result, - agent=agent, - board=board, - message=( - "Skipping agent: unable to read AUTH_TOKEN from TOOLS.md " - "(run with rotate_tokens=true to re-key)." - ), - ) - return None, False - auth_token = await _rotate_agent_token(ctx.session, agent) - - if agent.agent_token_hash and not verify_agent_token( - auth_token, - agent.agent_token_hash, - ): - if ctx.options.rotate_tokens: - auth_token = await _rotate_agent_token(ctx.session, agent) - else: - _append_sync_error( - result, - agent=agent, - board=board, - message=( - "Warning: AUTH_TOKEN in TOOLS.md does not match backend " - "token hash (agent auth may be broken)." - ), - ) - return auth_token, False - - -async def _sync_one_agent( - ctx: _SyncContext, - result: GatewayTemplatesSyncResult, - agent: Agent, - board: Board, -) -> bool: - auth_token, fatal = await _resolve_agent_auth_token( - ctx, - result, - agent, - board, - agent_gateway_id=_gateway_agent_id(agent), - ) - if fatal: - return True - if not auth_token: - return False - try: - - async def _do_provision() -> None: - await provision_agent( - agent, - AgentProvisionRequest( - board=board, - gateway=ctx.gateway, - auth_token=auth_token, - user=ctx.options.user, - options=ProvisionOptions( - action="update", - force_bootstrap=ctx.options.force_bootstrap, - reset_session=ctx.options.reset_sessions, - ), - ), - ) - - await _with_gateway_retry(_do_provision, backoff=ctx.backoff) - result.agents_updated += 1 - except TimeoutError as exc: # pragma: no cover - gateway/network dependent - result.agents_skipped += 1 - _append_sync_error(result, agent=agent, board=board, message=str(exc)) - return True - except (OSError, RuntimeError, ValueError) as exc: # pragma: no cover - result.agents_skipped += 1 - _append_sync_error( - result, - agent=agent, - board=board, - message=f"Failed to sync templates: {exc}", - ) - return False - else: - return False - - -async def _sync_main_agent( - ctx: _SyncContext, - result: GatewayTemplatesSyncResult, -) -> bool: - main_session_key = gateway_agent_session_key(ctx.gateway) - main_agent = ( - await Agent.objects.all() - .filter(col(Agent.gateway_id) == ctx.gateway.id) - .filter(col(Agent.board_id).is_(None)) - .first(ctx.session) - ) - if main_agent is None: - _append_sync_error( - result, - message=("Gateway agent record not found; " "skipping gateway agent template sync."), - ) - return True - main_gateway_agent_id = gateway_openclaw_agent_id(ctx.gateway) - - token, fatal = await _resolve_agent_auth_token( - ctx, - result, - main_agent, - board=None, - agent_gateway_id=main_gateway_agent_id, - ) - if fatal: - return True - if not token: - _append_sync_error( - result, - agent=main_agent, - message="Skipping gateway agent: unable to read AUTH_TOKEN from TOOLS.md.", - ) - return True - stop_sync = False - try: - - async def _do_provision_main() -> None: - await provision_main_agent( - main_agent, - MainAgentProvisionRequest( - gateway=ctx.gateway, - auth_token=token, - user=ctx.options.user, - session_key=main_session_key, - options=ProvisionOptions( - action="update", - force_bootstrap=ctx.options.force_bootstrap, - reset_session=ctx.options.reset_sessions, - ), - ), - ) - - await _with_gateway_retry(_do_provision_main, backoff=ctx.backoff) - except TimeoutError as exc: # pragma: no cover - gateway/network dependent - _append_sync_error(result, agent=main_agent, message=str(exc)) - stop_sync = True - except (OSError, RuntimeError, ValueError) as exc: # pragma: no cover - _append_sync_error( - result, - agent=main_agent, - message=f"Failed to sync gateway agent templates: {exc}", - ) - else: - result.main_updated = True - return stop_sync - - -async def sync_gateway_templates( - session: AsyncSession, - gateway: Gateway, - options: GatewayTemplateSyncOptions, -) -> GatewayTemplatesSyncResult: - """Synchronize AGENTS/TOOLS/etc templates to gateway-connected agents.""" - result = _base_result( - gateway, - include_main=options.include_main, - reset_sessions=options.reset_sessions, - ) - if not gateway.url: - _append_sync_error( - result, - message="Gateway URL is not configured for this gateway.", - ) - return result - - ctx = _SyncContext( - session=session, - gateway=gateway, - config=GatewayClientConfig(url=gateway.url, token=gateway.token), - backoff=_GatewayBackoff(timeout_s=10 * 60), - options=options, - ) - if not await _ping_gateway(ctx, result): - return result - - boards = await Board.objects.filter_by(gateway_id=gateway.id).all(session) - boards_by_id = _boards_by_id(boards, board_id=options.board_id) - if boards_by_id is None: - _append_sync_error( - result, - message="Board does not belong to this gateway.", - ) - return result - paused_board_ids = await _paused_board_ids(session, list(boards_by_id.keys())) - if boards_by_id: - agents = await ( - Agent.objects.by_field_in("board_id", list(boards_by_id.keys())) - .order_by(col(Agent.created_at).asc()) - .all(session) - ) - else: - agents = [] - - stop_sync = False - for agent in agents: - board = boards_by_id.get(agent.board_id) if agent.board_id is not None else None - if board is None: - result.agents_skipped += 1 - _append_sync_error( - result, - agent=agent, - message="Skipping agent: board not found for agent.", - ) - continue - if board.id in paused_board_ids: - result.agents_skipped += 1 - continue - stop_sync = await _sync_one_agent(ctx, result, agent, board) - if stop_sync: - break - - if not stop_sync and options.include_main: - await _sync_main_agent(ctx, result) - return result diff --git a/backend/scripts/seed_demo.py b/backend/scripts/seed_demo.py index 2cc5189d..649f6ad8 100644 --- a/backend/scripts/seed_demo.py +++ b/backend/scripts/seed_demo.py @@ -18,7 +18,7 @@ async def run() -> None: from app.models.boards import Board from app.models.gateways import Gateway from app.models.users import User - from app.services.gateway_agents import gateway_agent_session_key + from app.services.openclaw import GatewayAgentIdentity await init_db() async with async_session_maker() as session: @@ -30,7 +30,7 @@ async def run() -> None: main_session_key="placeholder", workspace_root=str(demo_workspace_root), ) - gateway.main_session_key = gateway_agent_session_key(gateway) + gateway.main_session_key = GatewayAgentIdentity.session_key(gateway) session.add(gateway) await session.commit() await session.refresh(gateway) diff --git a/backend/scripts/sync_gateway_templates.py b/backend/scripts/sync_gateway_templates.py index 9e1e8a16..ab758df9 100644 --- a/backend/scripts/sync_gateway_templates.py +++ b/backend/scripts/sync_gateway_templates.py @@ -52,7 +52,7 @@ def _parse_args() -> argparse.Namespace: async def _run() -> int: from app.db.session import async_session_maker from app.models.gateways import Gateway - from app.services.template_sync import GatewayTemplateSyncOptions, sync_gateway_templates + from app.services.openclaw import GatewayTemplateSyncOptions, sync_gateway_templates args = _parse_args() gateway_id = UUID(args.gateway_id) diff --git a/backend/tests/test_agent_provisioning_utils.py b/backend/tests/test_agent_provisioning_utils.py index 2b8b0644..611c6eca 100644 --- a/backend/tests/test_agent_provisioning_utils.py +++ b/backend/tests/test_agent_provisioning_utils.py @@ -7,11 +7,8 @@ from uuid import UUID, uuid4 import pytest -from app.services import agent_provisioning -from app.services.gateway_agents import ( - gateway_agent_session_key_for_id, - gateway_openclaw_agent_id_for_id, -) +from app.services.openclaw import GatewayAgentIdentity +from app.services.openclaw import provisioning as agent_provisioning def test_slugify_normalizes_and_trims(): @@ -81,7 +78,7 @@ class _GatewayStub: @pytest.mark.asyncio async def test_provision_main_agent_uses_dedicated_openclaw_agent_id(monkeypatch): gateway_id = uuid4() - session_key = gateway_agent_session_key_for_id(gateway_id) + session_key = GatewayAgentIdentity.session_key_for_id(gateway_id) gateway = _GatewayStub( id=gateway_id, name="Acme", @@ -149,6 +146,6 @@ async def test_provision_main_agent_uses_dedicated_openclaw_agent_id(monkeypatch ), ) - expected_agent_id = gateway_openclaw_agent_id_for_id(gateway_id) + expected_agent_id = GatewayAgentIdentity.openclaw_agent_id_for_id(gateway_id) assert captured["patched_agent_id"] == expected_agent_id assert captured["files_index_agent_id"] == expected_agent_id diff --git a/backend/tests/test_lifecycle_services.py b/backend/tests/test_lifecycle_services.py new file mode 100644 index 00000000..67936b05 --- /dev/null +++ b/backend/tests/test_lifecycle_services.py @@ -0,0 +1,258 @@ +# ruff: noqa: S101 +"""Unit tests for lifecycle coordination and onboarding messaging services.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from types import SimpleNamespace +from typing import Any +from uuid import UUID, uuid4 + +import pytest +from fastapi import HTTPException, status + +from app.integrations.openclaw_gateway import GatewayConfig as GatewayClientConfig +from app.integrations.openclaw_gateway import OpenClawGatewayError +from app.services.openclaw import services as lifecycle + + +@dataclass +class _FakeSession: + committed: int = 0 + added: list[object] = field(default_factory=list) + + def add(self, value: object) -> None: + self.added.append(value) + + async def commit(self) -> None: + self.committed += 1 + + +@dataclass +class _AgentStub: + id: UUID + name: str + openclaw_session_id: str | None = None + board_id: UUID | None = None + + +@dataclass +class _BoardStub: + id: UUID + gateway_id: UUID | None + name: str + + +@pytest.mark.asyncio +async def test_gateway_coordination_nudge_success(monkeypatch: pytest.MonkeyPatch) -> None: + session = _FakeSession() + service = lifecycle.GatewayCoordinationService(session) # type: ignore[arg-type] + board = _BoardStub(id=uuid4(), gateway_id=uuid4(), name="Roadmap") + actor = _AgentStub(id=uuid4(), name="Lead Agent", board_id=board.id) + target = _AgentStub( + id=uuid4(), + name="Worker Agent", + openclaw_session_id="agent:worker:main", + board_id=board.id, + ) + captured: list[dict[str, Any]] = [] + + async def _fake_board_agent_or_404( + self: lifecycle.GatewayCoordinationService, + *, + board: object, + agent_id: str, + ) -> _AgentStub: + _ = (self, board, agent_id) + return target + + async def _fake_require_gateway_config_for_board( + _session: object, + _board: object, + ) -> tuple[object, GatewayClientConfig]: + gateway = SimpleNamespace(id=uuid4(), url="ws://gateway.example/ws") + return gateway, GatewayClientConfig(url="ws://gateway.example/ws", token=None) + + async def _fake_send_gateway_agent_message(**kwargs: Any) -> dict[str, bool]: + captured.append(kwargs) + return {"ok": True} + + monkeypatch.setattr( + lifecycle.GatewayCoordinationService, + "_board_agent_or_404", + _fake_board_agent_or_404, + ) + monkeypatch.setattr( + lifecycle, + "require_gateway_config_for_board", + _fake_require_gateway_config_for_board, + ) + monkeypatch.setattr( + lifecycle, + "send_gateway_agent_message", + _fake_send_gateway_agent_message, + ) + + await service.nudge_board_agent( + board=board, # type: ignore[arg-type] + actor_agent=actor, # type: ignore[arg-type] + target_agent_id=str(target.id), + message="Please run BOOT.md", + correlation_id="nudge-corr-id", + ) + + assert len(captured) == 1 + assert captured[0]["session_key"] == "agent:worker:main" + assert captured[0]["agent_name"] == "Worker Agent" + assert captured[0]["deliver"] is True + assert session.committed == 1 + + +@pytest.mark.asyncio +async def test_gateway_coordination_nudge_maps_gateway_error( + monkeypatch: pytest.MonkeyPatch, +) -> None: + session = _FakeSession() + service = lifecycle.GatewayCoordinationService(session) # type: ignore[arg-type] + board = _BoardStub(id=uuid4(), gateway_id=uuid4(), name="Roadmap") + actor = _AgentStub(id=uuid4(), name="Lead Agent", board_id=board.id) + target = _AgentStub( + id=uuid4(), + name="Worker Agent", + openclaw_session_id="agent:worker:main", + board_id=board.id, + ) + + async def _fake_board_agent_or_404( + self: lifecycle.GatewayCoordinationService, + *, + board: object, + agent_id: str, + ) -> _AgentStub: + _ = (self, board, agent_id) + return target + + async def _fake_require_gateway_config_for_board( + _session: object, + _board: object, + ) -> tuple[object, GatewayClientConfig]: + gateway = SimpleNamespace(id=uuid4(), url="ws://gateway.example/ws") + return gateway, GatewayClientConfig(url="ws://gateway.example/ws", token=None) + + async def _fake_send_gateway_agent_message(**_kwargs: Any) -> None: + raise OpenClawGatewayError("dial tcp: connection refused") + + monkeypatch.setattr( + lifecycle.GatewayCoordinationService, + "_board_agent_or_404", + _fake_board_agent_or_404, + ) + monkeypatch.setattr( + lifecycle, + "require_gateway_config_for_board", + _fake_require_gateway_config_for_board, + ) + monkeypatch.setattr( + lifecycle, + "send_gateway_agent_message", + _fake_send_gateway_agent_message, + ) + + with pytest.raises(HTTPException) as exc_info: + await service.nudge_board_agent( + board=board, # type: ignore[arg-type] + actor_agent=actor, # type: ignore[arg-type] + target_agent_id=str(target.id), + message="Please run BOOT.md", + correlation_id="nudge-corr-id", + ) + + assert exc_info.value.status_code == status.HTTP_502_BAD_GATEWAY + assert "Gateway nudge failed:" in str(exc_info.value.detail) + assert session.committed == 1 + + +@pytest.mark.asyncio +async def test_board_onboarding_dispatch_start_returns_session_key( + monkeypatch: pytest.MonkeyPatch, +) -> None: + session = _FakeSession() + service = lifecycle.BoardOnboardingMessagingService(session) # type: ignore[arg-type] + gateway_id = uuid4() + board = _BoardStub(id=uuid4(), gateway_id=gateway_id, name="Roadmap") + captured: list[dict[str, Any]] = [] + + async def _fake_require_gateway_config_for_board( + _session: object, + _board: object, + ) -> tuple[object, GatewayClientConfig]: + gateway = SimpleNamespace(id=gateway_id, url="ws://gateway.example/ws") + return gateway, GatewayClientConfig(url="ws://gateway.example/ws", token=None) + + async def _fake_send_gateway_agent_message(**kwargs: Any) -> dict[str, bool]: + captured.append(kwargs) + return {"ok": True} + + monkeypatch.setattr( + lifecycle, + "require_gateway_config_for_board", + _fake_require_gateway_config_for_board, + ) + monkeypatch.setattr( + lifecycle, + "send_gateway_agent_message", + _fake_send_gateway_agent_message, + ) + + session_key = await service.dispatch_start_prompt( + board=board, # type: ignore[arg-type] + prompt="BOARD ONBOARDING REQUEST", + correlation_id="onboarding-corr-id", + ) + + assert session_key == lifecycle.GatewayAgentIdentity.session_key_for_id(gateway_id) + assert len(captured) == 1 + assert captured[0]["agent_name"] == "Gateway Agent" + assert captured[0]["deliver"] is False + + +@pytest.mark.asyncio +async def test_board_onboarding_dispatch_answer_maps_timeout_error( + monkeypatch: pytest.MonkeyPatch, +) -> None: + session = _FakeSession() + service = lifecycle.BoardOnboardingMessagingService(session) # type: ignore[arg-type] + board = _BoardStub(id=uuid4(), gateway_id=uuid4(), name="Roadmap") + onboarding = SimpleNamespace(id=uuid4(), session_key="agent:gateway-main:main") + + async def _fake_require_gateway_config_for_board( + _session: object, + _board: object, + ) -> tuple[object, GatewayClientConfig]: + gateway = SimpleNamespace(id=uuid4(), url="ws://gateway.example/ws") + return gateway, GatewayClientConfig(url="ws://gateway.example/ws", token=None) + + async def _fake_send_gateway_agent_message(**_kwargs: Any) -> None: + raise TimeoutError("gateway timeout") + + monkeypatch.setattr( + lifecycle, + "require_gateway_config_for_board", + _fake_require_gateway_config_for_board, + ) + monkeypatch.setattr( + lifecycle, + "send_gateway_agent_message", + _fake_send_gateway_agent_message, + ) + + with pytest.raises(HTTPException) as exc_info: + await service.dispatch_answer( + board=board, # type: ignore[arg-type] + onboarding=onboarding, + answer_text="I prefer concise updates.", + correlation_id="onboarding-answer-corr-id", + ) + + assert exc_info.value.status_code == status.HTTP_502_BAD_GATEWAY + assert "Gateway onboarding answer dispatch failed:" in str(exc_info.value.detail)