Files
openclaw-mission-control/scripts/check_markdown_links.py

95 lines
2.5 KiB
Python
Executable File

#!/usr/bin/env python3
"""Lightweight markdown link checker for repo docs.
Checks *relative* links inside markdown files and fails CI if any targets are missing.
Design goals:
- No external deps.
- Ignore http(s)/mailto links.
- Ignore pure anchors (#foo).
- Support links with anchors (./path.md#section) by checking only the path part.
Limitations:
- Does not validate that anchors exist inside target files.
- Does not validate links generated dynamically or via HTML.
"""
from __future__ import annotations
import re
import sys
from pathlib import Path
LINK_RE = re.compile(r"\[[^\]]+\]\(([^)]+)\)")
def iter_md_files(root: Path) -> list[Path]:
"""Return markdown files to check.
Policy (initial): check only `docs/**/*.md`.
Rationale:
- Root `README.md` / `CONTRIBUTING.md` may temporarily contain legacy links
during docs re-org. Once docs + README are stabilized, we can expand this
to include root markdown files.
"""
docs = root / "docs"
if not docs.exists():
return []
return sorted(docs.rglob("*.md"))
def normalize_target(raw: str) -> str | None:
raw = raw.strip()
if not raw:
return None
if raw.startswith("http://") or raw.startswith("https://") or raw.startswith("mailto:"):
return None
if raw.startswith("#"):
return None
# strip query/fragment
raw = raw.split("#", 1)[0].split("?", 1)[0]
if not raw:
return None
return raw
def main() -> int:
root = Path(__file__).resolve().parents[1]
md_files = iter_md_files(root)
missing: list[tuple[Path, str]] = []
for md in md_files:
text = md.read_text(encoding="utf-8")
for m in LINK_RE.finditer(text):
target_raw = m.group(1)
target = normalize_target(target_raw)
if target is None:
continue
# Skip common markdown reference-style quirks.
if target.startswith("<") and target.endswith(">"):
continue
# Resolve relative to current file.
resolved = (md.parent / target).resolve()
if not resolved.exists():
missing.append((md, target_raw))
if missing:
print("Broken relative links detected:\n")
for md, target in missing:
print(f"- {md.relative_to(root)} -> {target}")
print(f"\nTotal: {len(missing)}")
return 1
print(f"OK: checked {len(md_files)} markdown files")
return 0
if __name__ == "__main__":
raise SystemExit(main())