Files
openclaw-mission-control/scripts/check_markdown_links.py

104 lines
2.7 KiB
Python
Executable File

#!/usr/bin/env python3
"""Lightweight markdown link checker for repo docs.
Checks *relative* links inside markdown files and fails CI if any targets are missing.
Design goals:
- No external deps.
- Ignore http(s)/mailto links.
- Ignore pure anchors (#foo).
- Support links with anchors (./path.md#section) by checking only the path part.
Limitations:
- Does not validate that anchors exist inside target files.
- Does not validate links generated dynamically or via HTML.
"""
from __future__ import annotations
import re
import sys
from pathlib import Path
LINK_RE = re.compile(r"\[[^\]]+\]\(([^)]+)\)")
def iter_md_files(root: Path) -> list[Path]:
"""Return markdown files to check.
Policy:
- Always check root contributor-facing markdown (`README.md`, `CONTRIBUTING.md`).
- If `docs/` exists, also check `docs/**/*.md`.
Rationale:
- We want fast feedback on broken *relative* links in the most important entrypoints.
- We intentionally do **not** crawl external URLs.
"""
files: list[Path] = []
for p in (root / "README.md", root / "CONTRIBUTING.md"):
if p.exists():
files.append(p)
docs = root / "docs"
if docs.exists():
files.extend(sorted(docs.rglob("*.md")))
# de-dupe + stable order
return sorted({p.resolve() for p in files})
def normalize_target(raw: str) -> str | None:
raw = raw.strip()
if not raw:
return None
if raw.startswith("http://") or raw.startswith("https://") or raw.startswith("mailto:"):
return None
if raw.startswith("#"):
return None
# strip query/fragment
raw = raw.split("#", 1)[0].split("?", 1)[0]
if not raw:
return None
return raw
def main() -> int:
root = Path(__file__).resolve().parents[1]
md_files = iter_md_files(root)
missing: list[tuple[Path, str]] = []
for md in md_files:
text = md.read_text(encoding="utf-8")
for m in LINK_RE.finditer(text):
target_raw = m.group(1)
target = normalize_target(target_raw)
if target is None:
continue
# Skip common markdown reference-style quirks.
if target.startswith("<") and target.endswith(">"):
continue
# Resolve relative to current file.
resolved = (md.parent / target).resolve()
if not resolved.exists():
missing.append((md, target_raw))
if missing:
print("Broken relative links detected:\n")
for md, target in missing:
print(f"- {md.relative_to(root)} -> {target}")
print(f"\nTotal: {len(missing)}")
return 1
print(f"OK: checked {len(md_files)} markdown files")
return 0
if __name__ == "__main__":
raise SystemExit(main())