#!/usr/bin/env node
/**
* 网页内容抓取 — 将网页 HTML 转换为可读文本
* 支持提取正文、标题、链接等关键信息
*/
function usage() {
console.error('Usage: fetch.mjs "URL" [--raw] [--max 5000]');
console.error(" --raw Output raw HTML instead of extracted text");
console.error(" --max Maximum output characters (default: 5000)");
process.exit(2);
}
const args = process.argv.slice(2);
if (args.length === 0 || args[0] === "-h" || args[0] === "--help") usage();
const url = args[0];
let raw = false;
let maxChars = 5000;
for (let i = 1; i < args.length; i++) {
if (args[i] === "--raw") raw = true;
else if (args[i] === "--max") maxChars = parseInt(args[++i] || "5000", 10);
}
function htmlToText(html) {
let text = html;
text = text.replace(/