feat: add souls directory integration with search and fetch functionality

This commit is contained in:
Abhimanyu Saharan
2026-02-08 00:46:10 +05:30
parent 527cc13c63
commit 460d4adddf
8 changed files with 395 additions and 2 deletions

View File

@@ -1,11 +1,12 @@
from __future__ import annotations
import re
from collections.abc import Sequence
from typing import Any, cast
from uuid import UUID
from fastapi import APIRouter, Depends, HTTPException, Query, status
from sqlmodel import col, select
from sqlmodel import SQLModel, col, select
from sqlmodel.ext.asyncio.session import AsyncSession
from app.api import agents as agents_api
@@ -19,7 +20,7 @@ from app.core.config import settings
from app.db.pagination import paginate
from app.db.session import get_session
from app.integrations.openclaw_gateway import GatewayConfig as GatewayClientConfig
from app.integrations.openclaw_gateway import OpenClawGatewayError, ensure_session, send_message
from app.integrations.openclaw_gateway import OpenClawGatewayError, ensure_session, openclaw_call, send_message
from app.models.activity_events import ActivityEvent
from app.models.agents import Agent
from app.models.approvals import Approval
@@ -62,6 +63,26 @@ from app.services.task_dependencies import (
router = APIRouter(prefix="/agent", tags=["agent"])
_AGENT_SESSION_PREFIX = "agent:"
def _gateway_agent_id(agent: Agent) -> str:
session_key = agent.openclaw_session_id or ""
if session_key.startswith(_AGENT_SESSION_PREFIX):
parts = session_key.split(":")
if len(parts) >= 2 and parts[1]:
return parts[1]
# Fall back to a stable slug derived from name (matches provisioning behavior).
value = agent.name.lower().strip()
value = re.sub(r"[^a-z0-9]+", "-", value).strip("-")
return value or str(agent.id)
class SoulUpdateRequest(SQLModel):
content: str
source_url: str | None = None
reason: str | None = None
def _actor(agent_ctx: AgentAuthContext) -> ActorContext:
return ActorContext(actor_type="agent", agent=agent_ctx.agent)
@@ -492,6 +513,90 @@ async def agent_heartbeat(
)
@router.get("/boards/{board_id}/agents/{agent_id}/soul", response_model=str)
async def get_agent_soul(
agent_id: str,
board: Board = Depends(get_board_or_404),
session: AsyncSession = Depends(get_session),
agent_ctx: AgentAuthContext = Depends(get_agent_auth_context),
) -> str:
_guard_board_access(agent_ctx, board)
if not agent_ctx.agent.is_board_lead and str(agent_ctx.agent.id) != agent_id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN)
target = await session.get(Agent, agent_id)
if target is None or (target.board_id and target.board_id != board.id):
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND)
config = await _gateway_config(session, board)
gateway_id = _gateway_agent_id(target)
try:
payload = await openclaw_call(
"agents.files.get",
{"agentId": gateway_id, "name": "SOUL.md"},
config=config,
)
except OpenClawGatewayError as exc:
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(exc)) from exc
if isinstance(payload, str):
return payload
if isinstance(payload, dict):
content = payload.get("content")
if isinstance(content, str):
return content
file_obj = payload.get("file")
if isinstance(file_obj, dict):
nested = file_obj.get("content")
if isinstance(nested, str):
return nested
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="Invalid gateway response")
@router.put("/boards/{board_id}/agents/{agent_id}/soul", response_model=OkResponse)
async def update_agent_soul(
agent_id: str,
payload: SoulUpdateRequest,
board: Board = Depends(get_board_or_404),
session: AsyncSession = Depends(get_session),
agent_ctx: AgentAuthContext = Depends(get_agent_auth_context),
) -> OkResponse:
_guard_board_access(agent_ctx, board)
if not agent_ctx.agent.is_board_lead:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN)
target = await session.get(Agent, agent_id)
if target is None or (target.board_id and target.board_id != board.id):
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND)
config = await _gateway_config(session, board)
gateway_id = _gateway_agent_id(target)
content = payload.content.strip()
if not content:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail="content is required",
)
try:
await openclaw_call(
"agents.files.set",
{"agentId": gateway_id, "name": "SOUL.md", "content": content},
config=config,
)
except OpenClawGatewayError as exc:
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(exc)) from exc
reason = (payload.reason or "").strip()
source_url = (payload.source_url or "").strip()
note = f"SOUL.md updated for {target.name}."
if reason:
note = f"{note} Reason: {reason}"
if source_url:
note = f"{note} Source: {source_url}"
record_activity(
session,
event_type="agent.soul.updated",
message=note,
agent_id=agent_ctx.agent.id,
)
await session.commit()
return OkResponse()
@router.post(
"/boards/{board_id}/gateway/main/ask-user",
response_model=GatewayMainAskUserResponse,

View File

@@ -0,0 +1,74 @@
from __future__ import annotations
import re
from fastapi import APIRouter, Depends, HTTPException, Query, status
from app.api.deps import ActorContext, require_admin_or_agent
from app.schemas.souls_directory import (
SoulsDirectoryMarkdownResponse,
SoulsDirectorySearchResponse,
SoulsDirectorySoulRef,
)
from app.services import souls_directory
router = APIRouter(prefix="/souls-directory", tags=["souls-directory"])
_SAFE_SEGMENT_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_-]*$")
_SAFE_SLUG_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_-]*$")
def _validate_segment(value: str, *, field: str) -> str:
cleaned = value.strip().strip("/")
if not cleaned:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail=f"{field} is required",
)
if field == "handle":
ok = bool(_SAFE_SEGMENT_RE.match(cleaned))
else:
ok = bool(_SAFE_SLUG_RE.match(cleaned))
if not ok:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail=f"{field} contains unsupported characters",
)
return cleaned
@router.get("/search", response_model=SoulsDirectorySearchResponse)
async def search(
q: str = Query(default="", min_length=0),
limit: int = Query(default=20, ge=1, le=100),
_actor: ActorContext = Depends(require_admin_or_agent),
) -> SoulsDirectorySearchResponse:
refs = await souls_directory.list_souls_directory_refs()
matches = souls_directory.search_souls(refs, query=q, limit=limit)
items = [
SoulsDirectorySoulRef(
handle=ref.handle,
slug=ref.slug,
page_url=ref.page_url,
raw_md_url=ref.raw_md_url,
)
for ref in matches
]
return SoulsDirectorySearchResponse(items=items)
@router.get("/{handle}/{slug}.md", response_model=SoulsDirectoryMarkdownResponse)
@router.get("/{handle}/{slug}", response_model=SoulsDirectoryMarkdownResponse)
async def get_markdown(
handle: str,
slug: str,
_actor: ActorContext = Depends(require_admin_or_agent),
) -> SoulsDirectoryMarkdownResponse:
safe_handle = _validate_segment(handle, field="handle")
safe_slug = _validate_segment(slug.removesuffix(".md"), field="slug")
try:
content = await souls_directory.fetch_soul_markdown(handle=safe_handle, slug=safe_slug)
except Exception as exc:
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(exc)) from exc
return SoulsDirectoryMarkdownResponse(handle=safe_handle, slug=safe_slug, content=content)

View File

@@ -20,6 +20,7 @@ from app.api.boards import router as boards_router
from app.api.gateway import router as gateway_router
from app.api.gateways import router as gateways_router
from app.api.metrics import router as metrics_router
from app.api.souls_directory import router as souls_directory_router
from app.api.tasks import router as tasks_router
from app.api.users import router as users_router
from app.core.config import settings
@@ -74,6 +75,7 @@ api_v1.include_router(activity_router)
api_v1.include_router(gateway_router)
api_v1.include_router(gateways_router)
api_v1.include_router(metrics_router)
api_v1.include_router(souls_directory_router)
api_v1.include_router(board_groups_router)
api_v1.include_router(board_group_memory_router)
api_v1.include_router(boards_router)

View File

@@ -12,6 +12,11 @@ from app.schemas.board_onboarding import (
from app.schemas.boards import BoardCreate, BoardRead, BoardUpdate
from app.schemas.gateways import GatewayCreate, GatewayRead, GatewayUpdate
from app.schemas.metrics import DashboardMetrics
from app.schemas.souls_directory import (
SoulsDirectoryMarkdownResponse,
SoulsDirectorySearchResponse,
SoulsDirectorySoulRef,
)
from app.schemas.tasks import TaskCreate, TaskRead, TaskUpdate
from app.schemas.users import UserCreate, UserRead, UserUpdate
@@ -38,6 +43,9 @@ __all__ = [
"GatewayRead",
"GatewayUpdate",
"DashboardMetrics",
"SoulsDirectoryMarkdownResponse",
"SoulsDirectorySearchResponse",
"SoulsDirectorySoulRef",
"TaskCreate",
"TaskRead",
"TaskUpdate",

View File

@@ -0,0 +1,21 @@
from __future__ import annotations
from pydantic import BaseModel
class SoulsDirectorySoulRef(BaseModel):
handle: str
slug: str
page_url: str
raw_md_url: str
class SoulsDirectorySearchResponse(BaseModel):
items: list[SoulsDirectorySoulRef]
class SoulsDirectoryMarkdownResponse(BaseModel):
handle: str
slug: str
content: str

View File

@@ -0,0 +1,129 @@
from __future__ import annotations
import time
import xml.etree.ElementTree as ET
from dataclasses import dataclass
from typing import Final
import httpx
SOULS_DIRECTORY_BASE_URL: Final[str] = "https://souls.directory"
SOULS_DIRECTORY_SITEMAP_URL: Final[str] = f"{SOULS_DIRECTORY_BASE_URL}/sitemap.xml"
_SITEMAP_TTL_SECONDS: Final[int] = 60 * 60
@dataclass(frozen=True, slots=True)
class SoulRef:
handle: str
slug: str
@property
def page_url(self) -> str:
return f"{SOULS_DIRECTORY_BASE_URL}/souls/{self.handle}/{self.slug}"
@property
def raw_md_url(self) -> str:
return f"{SOULS_DIRECTORY_BASE_URL}/api/souls/{self.handle}/{self.slug}.md"
def _parse_sitemap_soul_refs(sitemap_xml: str) -> list[SoulRef]:
try:
root = ET.fromstring(sitemap_xml)
except ET.ParseError:
return []
# Handle both namespaced and non-namespaced sitemap XML.
urls: list[str] = []
for loc in root.iter():
if loc.tag.endswith("loc") and loc.text:
urls.append(loc.text.strip())
refs: list[SoulRef] = []
for url in urls:
if not url.startswith(f"{SOULS_DIRECTORY_BASE_URL}/souls/"):
continue
# Expected: https://souls.directory/souls/{handle}/{slug}
parts = url.split("/")
if len(parts) < 6:
continue
handle = parts[4].strip()
slug = parts[5].strip()
if not handle or not slug:
continue
refs.append(SoulRef(handle=handle, slug=slug))
return refs
_sitemap_cache: dict[str, object] = {
"loaded_at": 0.0,
"refs": [],
}
async def list_souls_directory_refs(*, client: httpx.AsyncClient | None = None) -> list[SoulRef]:
now = time.time()
loaded_raw = _sitemap_cache.get("loaded_at")
loaded_at = loaded_raw if isinstance(loaded_raw, (int, float)) else 0.0
cached = _sitemap_cache.get("refs")
if cached and isinstance(cached, list) and now - loaded_at < _SITEMAP_TTL_SECONDS:
return cached
owns_client = client is None
if client is None:
client = httpx.AsyncClient(
timeout=httpx.Timeout(10.0, connect=5.0),
headers={"User-Agent": "openclaw-mission-control/1.0"},
)
try:
resp = await client.get(SOULS_DIRECTORY_SITEMAP_URL)
resp.raise_for_status()
refs = _parse_sitemap_soul_refs(resp.text)
_sitemap_cache["loaded_at"] = now
_sitemap_cache["refs"] = refs
return refs
finally:
if owns_client:
await client.aclose()
async def fetch_soul_markdown(
*,
handle: str,
slug: str,
client: httpx.AsyncClient | None = None,
) -> str:
normalized_handle = handle.strip().strip("/")
normalized_slug = slug.strip().strip("/")
if normalized_slug.endswith(".md"):
normalized_slug = normalized_slug[: -len(".md")]
url = f"{SOULS_DIRECTORY_BASE_URL}/api/souls/{normalized_handle}/{normalized_slug}.md"
owns_client = client is None
if client is None:
client = httpx.AsyncClient(
timeout=httpx.Timeout(15.0, connect=5.0),
headers={"User-Agent": "openclaw-mission-control/1.0"},
)
try:
resp = await client.get(url)
resp.raise_for_status()
return resp.text
finally:
if owns_client:
await client.aclose()
def search_souls(refs: list[SoulRef], *, query: str, limit: int = 20) -> list[SoulRef]:
q = query.strip().lower()
if not q:
return refs[: max(0, min(limit, len(refs)))]
matches: list[SoulRef] = []
for ref in refs:
hay = f"{ref.handle}/{ref.slug}".lower()
if q in hay:
matches.append(ref)
if len(matches) >= limit:
break
return matches

View File

@@ -0,0 +1,29 @@
from __future__ import annotations
from app.services.souls_directory import SoulRef, _parse_sitemap_soul_refs, search_souls
def test_parse_sitemap_extracts_soul_refs() -> None:
xml = """<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url><loc>https://souls.directory</loc></url>
<url><loc>https://souls.directory/souls/thedaviddias/code-reviewer</loc></url>
<url><loc>https://souls.directory/souls/someone/technical-writer</loc></url>
</urlset>
"""
refs = _parse_sitemap_soul_refs(xml)
assert refs == [
SoulRef(handle="thedaviddias", slug="code-reviewer"),
SoulRef(handle="someone", slug="technical-writer"),
]
def test_search_souls_matches_handle_or_slug() -> None:
refs = [
SoulRef(handle="thedaviddias", slug="code-reviewer"),
SoulRef(handle="thedaviddias", slug="technical-writer"),
SoulRef(handle="someone", slug="pirate-captain"),
]
assert search_souls(refs, query="writer", limit=20) == [refs[1]]
assert search_souls(refs, query="thedaviddias", limit=20) == [refs[0], refs[1]]

View File

@@ -297,6 +297,31 @@ Body: {"depends_on_task_ids":["DEP_TASK_ID_1","DEP_TASK_ID_2"]}
9) Post a brief status update in board memory (1-3 bullets).
## Soul Inspiration (Optional)
Sometimes it's useful to improve your `SOUL.md` (or an agent's `SOUL.md`) to better match the work, constraints, and desired collaboration style.
Rules:
- Use external SOUL templates (e.g. souls.directory) as inspiration only. Do not copy-paste large sections verbatim.
- Prefer small, reversible edits. Keep `SOUL.md` stable; put fast-evolving preferences in `SELF.md`.
- When proposing a change, include:
- The source page URL(s) you looked at.
- A short summary of the principles you are borrowing.
- A minimal diff-like description of what would change.
- A rollback note (how to revert).
- Do not apply changes silently. Create a board approval first if the change is non-trivial.
Tools:
- Search souls directory:
GET $BASE_URL/api/v1/souls-directory/search?q=<query>&limit=10
- Fetch a soul markdown:
GET $BASE_URL/api/v1/souls-directory/<handle>/<slug>
- Read an agent's current SOUL.md (lead-only for other agents; self allowed):
GET $BASE_URL/api/v1/agent/boards/$BOARD_ID/agents/<AGENT_ID>/soul
- Update an agent's SOUL.md (lead-only):
PUT $BASE_URL/api/v1/agent/boards/$BOARD_ID/agents/<AGENT_ID>/soul
Body: {"content":"<new SOUL.md>","source_url":"<optional>","reason":"<optional>"}
## Memory Maintenance (every 2-3 days)
Lightweight consolidation (modeled on human "sleep consolidation"):
1) Read recent `memory/YYYY-MM-DD.md` files (since last consolidation, or last 2-3 days).