Files
openclaw-mission-control/backend/app/services/souls_directory.py

130 lines
3.7 KiB
Python
Raw Normal View History

from __future__ import annotations
import time
import xml.etree.ElementTree as ET
from dataclasses import dataclass
from typing import Final
import httpx
SOULS_DIRECTORY_BASE_URL: Final[str] = "https://souls.directory"
SOULS_DIRECTORY_SITEMAP_URL: Final[str] = f"{SOULS_DIRECTORY_BASE_URL}/sitemap.xml"
_SITEMAP_TTL_SECONDS: Final[int] = 60 * 60
@dataclass(frozen=True, slots=True)
class SoulRef:
handle: str
slug: str
@property
def page_url(self) -> str:
return f"{SOULS_DIRECTORY_BASE_URL}/souls/{self.handle}/{self.slug}"
@property
def raw_md_url(self) -> str:
return f"{SOULS_DIRECTORY_BASE_URL}/api/souls/{self.handle}/{self.slug}.md"
def _parse_sitemap_soul_refs(sitemap_xml: str) -> list[SoulRef]:
try:
root = ET.fromstring(sitemap_xml)
except ET.ParseError:
return []
# Handle both namespaced and non-namespaced sitemap XML.
urls: list[str] = []
for loc in root.iter():
if loc.tag.endswith("loc") and loc.text:
urls.append(loc.text.strip())
refs: list[SoulRef] = []
for url in urls:
if not url.startswith(f"{SOULS_DIRECTORY_BASE_URL}/souls/"):
continue
# Expected: https://souls.directory/souls/{handle}/{slug}
parts = url.split("/")
if len(parts) < 6:
continue
handle = parts[4].strip()
slug = parts[5].strip()
if not handle or not slug:
continue
refs.append(SoulRef(handle=handle, slug=slug))
return refs
_sitemap_cache: dict[str, object] = {
"loaded_at": 0.0,
"refs": [],
}
async def list_souls_directory_refs(*, client: httpx.AsyncClient | None = None) -> list[SoulRef]:
now = time.time()
loaded_raw = _sitemap_cache.get("loaded_at")
loaded_at = loaded_raw if isinstance(loaded_raw, (int, float)) else 0.0
cached = _sitemap_cache.get("refs")
if cached and isinstance(cached, list) and now - loaded_at < _SITEMAP_TTL_SECONDS:
return cached
owns_client = client is None
if client is None:
client = httpx.AsyncClient(
timeout=httpx.Timeout(10.0, connect=5.0),
headers={"User-Agent": "openclaw-mission-control/1.0"},
)
try:
resp = await client.get(SOULS_DIRECTORY_SITEMAP_URL)
resp.raise_for_status()
refs = _parse_sitemap_soul_refs(resp.text)
_sitemap_cache["loaded_at"] = now
_sitemap_cache["refs"] = refs
return refs
finally:
if owns_client:
await client.aclose()
async def fetch_soul_markdown(
*,
handle: str,
slug: str,
client: httpx.AsyncClient | None = None,
) -> str:
normalized_handle = handle.strip().strip("/")
normalized_slug = slug.strip().strip("/")
if normalized_slug.endswith(".md"):
normalized_slug = normalized_slug[: -len(".md")]
url = f"{SOULS_DIRECTORY_BASE_URL}/api/souls/{normalized_handle}/{normalized_slug}.md"
owns_client = client is None
if client is None:
client = httpx.AsyncClient(
timeout=httpx.Timeout(15.0, connect=5.0),
headers={"User-Agent": "openclaw-mission-control/1.0"},
)
try:
resp = await client.get(url)
resp.raise_for_status()
return resp.text
finally:
if owns_client:
await client.aclose()
def search_souls(refs: list[SoulRef], *, query: str, limit: int = 20) -> list[SoulRef]:
q = query.strip().lower()
if not q:
return refs[: max(0, min(limit, len(refs)))]
matches: list[SoulRef] = []
for ref in refs:
hay = f"{ref.handle}/{ref.slug}".lower()
if q in hay:
matches.append(ref)
if len(matches) >= limit:
break
return matches