feat: 添加 Dashboard 完整日志监控系统 v1.1.0
✨ 新增功能 - 完整的日志记录系统(6 种日志级别) - 日志配置功能(可通过 config.json 控制) - 性能监控装饰器和请求日志中间件 - 7 个管理工具脚本 - 完整的文档和使用指南 🛠️ 管理工具 - start-with-log.sh: 启动脚本(带日志) - stop-dashboard.sh: 停止脚本 - view-logs.sh: 日志查看器 - monitor-logs.sh: 实时监控工具(支持多种过滤器) - analyze-logs.sh: 日志分析工具(自动生成报告) - demo-logging.sh: 功能演示脚本 - test-logging-config.sh: 配置测试工具 📊 日志特性 - 支持 INFO/SUCCESS/WARN/ERROR/DEBUG/PERF 6 种级别 - 自动记录启动过程、API 请求、性能统计 - 缓存命中情况追踪 - 分步性能监控 - 智能过滤器 ⚙️ 配置功能 - 可控制是否启用日志(默认:true) - 可设置日志级别(默认:INFO) - 可控制文件/控制台输出 - 支持动态配置(重启生效) 📚 文档 - LOGGING_GUIDE.md: 完整使用指南 - LOGGING_CONFIG.md: 配置说明文档 - LOGGING_CONFIG_QUICK.md: 快速配置指南 - 多个中文说明文档 🔒 安全 - 添加 .gitignore 排除敏感信息 - config.json(含 Token)不提交 - 日志文件不提交 - 示例配置使用占位符 ✅ 测试 - 语法检查通过 - 功能完整性验证 - 配置控制测试通过 - 文档完整性检查 详见 CHANGELOG_v1.1.0.md Made-with: Cursor
This commit is contained in:
204
scripts/bot-doctor.sh
Executable file
204
scripts/bot-doctor.sh
Executable file
@@ -0,0 +1,204 @@
|
||||
#!/bin/bash
|
||||
# bot-doctor.sh — AI Team 三 Bot 健康监控 & 自动重启
|
||||
# 用法:
|
||||
# ./bot-doctor.sh 一次性检查 + 修复
|
||||
# ./bot-doctor.sh --watch 持续监控(每 60 秒)
|
||||
# ./bot-doctor.sh --status 仅查看状态,不修复
|
||||
# ./bot-doctor.sh --restart all|leader|kimi|qianwen|dashboard 手动重启
|
||||
|
||||
COMPOSE_DIR="/Users/fang/Desktop/ai-team"
|
||||
DASHBOARD_DIR="/Users/fang/Desktop/ai-team/dashboard"
|
||||
GATEWAY_URL="http://127.0.0.1:18789/healthz"
|
||||
DASHBOARD_URL="http://localhost:3800"
|
||||
WATCH_INTERVAL=60
|
||||
LOG_FILE="/Users/fang/Desktop/ai-team/logs/bot-doctor.log"
|
||||
|
||||
mkdir -p "$(dirname "$LOG_FILE")"
|
||||
|
||||
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[0;33m'; CYAN='\033[0;36m'; NC='\033[0m'; BOLD='\033[1m'
|
||||
|
||||
log() { local msg="[$(date '+%Y-%m-%d %H:%M:%S')] $1"; echo "$msg" >> "$LOG_FILE"; echo -e "$msg"; }
|
||||
|
||||
check_leader() {
|
||||
local code
|
||||
code=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "$GATEWAY_URL" 2>/dev/null)
|
||||
[ "$code" = "200" ]
|
||||
}
|
||||
|
||||
check_container() {
|
||||
local state
|
||||
state=$(docker inspect --format='{{.State.Status}}' "$1" 2>/dev/null)
|
||||
[ "$state" = "running" ]
|
||||
}
|
||||
|
||||
check_dashboard() {
|
||||
local code
|
||||
code=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 3 "$DASHBOARD_URL" 2>/dev/null)
|
||||
[ "$code" = "200" ]
|
||||
}
|
||||
|
||||
restart_leader() {
|
||||
log "${YELLOW}🦞 正在重启大龙虾 Gateway...${NC}"
|
||||
launchctl bootout gui/$(id -u) ~/Library/LaunchAgents/ai.openclaw.gateway.plist 2>/dev/null
|
||||
sleep 2
|
||||
launchctl bootstrap gui/$(id -u) ~/Library/LaunchAgents/ai.openclaw.gateway.plist 2>/dev/null
|
||||
sleep 3
|
||||
if check_leader; then
|
||||
log "${GREEN}✅ 大龙虾 Gateway 重启成功${NC}"
|
||||
return 0
|
||||
else
|
||||
log "${RED}❌ 大龙虾 Gateway 重启失败,尝试 openclaw gateway...${NC}"
|
||||
openclaw gateway &>/dev/null &
|
||||
sleep 5
|
||||
if check_leader; then
|
||||
log "${GREEN}✅ 大龙虾 Gateway 通过 openclaw 启动成功${NC}"
|
||||
return 0
|
||||
fi
|
||||
log "${RED}❌ 大龙虾 Gateway 启动失败!${NC}"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
restart_container() {
|
||||
local name="$1" display="$2"
|
||||
log "${YELLOW}🔄 正在重启 ${display} (${name})...${NC}"
|
||||
docker restart "$name" 2>/dev/null
|
||||
sleep 5
|
||||
if check_container "$name"; then
|
||||
log "${GREEN}✅ ${display} 重启成功${NC}"
|
||||
return 0
|
||||
else
|
||||
log "${YELLOW}尝试 docker compose up...${NC}"
|
||||
cd "$COMPOSE_DIR" && docker compose up -d "$name" 2>/dev/null
|
||||
sleep 5
|
||||
if check_container "$name"; then
|
||||
log "${GREEN}✅ ${display} 通过 compose 启动成功${NC}"
|
||||
return 0
|
||||
fi
|
||||
log "${RED}❌ ${display} 启动失败!${NC}"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
restart_dashboard() {
|
||||
log "${YELLOW}📊 正在重启 Dashboard...${NC}"
|
||||
lsof -ti :3800 | xargs kill 2>/dev/null
|
||||
sleep 1
|
||||
cd "$DASHBOARD_DIR" && nohup node server.js >> "$LOG_FILE" 2>&1 &
|
||||
sleep 2
|
||||
if check_dashboard; then
|
||||
log "${GREEN}✅ Dashboard 重启成功${NC}"
|
||||
return 0
|
||||
fi
|
||||
log "${RED}❌ Dashboard 启动失败!${NC}"
|
||||
return 1
|
||||
}
|
||||
|
||||
print_status() {
|
||||
echo ""
|
||||
echo -e "${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
||||
echo -e "${BOLD} 🏥 AI Team Bot Doctor $(date '+%H:%M:%S')${NC}"
|
||||
echo -e "${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
||||
echo ""
|
||||
|
||||
if check_leader; then
|
||||
echo -e " 🦞 大龙虾 (Gateway) ${GREEN}● 运行中${NC}"
|
||||
else
|
||||
echo -e " 🦞 大龙虾 (Gateway) ${RED}✕ 已停止${NC}"
|
||||
fi
|
||||
|
||||
if check_container "ai-team-kimi"; then
|
||||
echo -e " 🔬 智囊团 (kimi) ${GREEN}● 运行中${NC}"
|
||||
else
|
||||
echo -e " 🔬 智囊团 (kimi) ${RED}✕ 已停止${NC}"
|
||||
fi
|
||||
|
||||
if check_container "ai-team-qianwen"; then
|
||||
echo -e " ⚡ 全栈高手 (qianwen) ${GREEN}● 运行中${NC}"
|
||||
else
|
||||
echo -e " ⚡ 全栈高手 (qianwen) ${RED}✕ 已停止${NC}"
|
||||
fi
|
||||
|
||||
if check_dashboard; then
|
||||
echo -e " 📊 Dashboard (:3800) ${GREEN}● 运行中${NC}"
|
||||
else
|
||||
echo -e " 📊 Dashboard (:3800) ${YELLOW}✕ 未运行${NC}"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
do_heal() {
|
||||
local fixed=0
|
||||
|
||||
if ! check_leader; then
|
||||
restart_leader && fixed=$((fixed+1))
|
||||
fi
|
||||
|
||||
if ! check_container "ai-team-kimi"; then
|
||||
restart_container "ai-team-kimi" "智囊团" && fixed=$((fixed+1))
|
||||
fi
|
||||
|
||||
if ! check_container "ai-team-qianwen"; then
|
||||
restart_container "ai-team-qianwen" "全栈高手" && fixed=$((fixed+1))
|
||||
fi
|
||||
|
||||
if ! check_dashboard; then
|
||||
restart_dashboard && fixed=$((fixed+1))
|
||||
fi
|
||||
|
||||
if [ $fixed -eq 0 ]; then
|
||||
log "${GREEN}✅ 所有 Bot 运行正常,无需修复${NC}"
|
||||
else
|
||||
log "${CYAN}🔧 本次修复了 ${fixed} 个服务${NC}"
|
||||
fi
|
||||
}
|
||||
|
||||
case "${1:-}" in
|
||||
--status|-s)
|
||||
print_status
|
||||
;;
|
||||
--watch|-w)
|
||||
log "🏥 Bot Doctor 持续监控模式启动(间隔 ${WATCH_INTERVAL}s)"
|
||||
while true; do
|
||||
print_status
|
||||
do_heal
|
||||
echo -e "\n${CYAN} 下次检查: ${WATCH_INTERVAL}s 后 (Ctrl+C 退出)${NC}\n"
|
||||
sleep "$WATCH_INTERVAL"
|
||||
done
|
||||
;;
|
||||
--restart|-r)
|
||||
target="${2:-all}"
|
||||
case "$target" in
|
||||
all)
|
||||
restart_leader; restart_container "ai-team-kimi" "智囊团"; restart_container "ai-team-qianwen" "全栈高手"; restart_dashboard ;;
|
||||
leader|dalongxia)
|
||||
restart_leader ;;
|
||||
kimi)
|
||||
restart_container "ai-team-kimi" "智囊团" ;;
|
||||
qianwen)
|
||||
restart_container "ai-team-qianwen" "全栈高手" ;;
|
||||
dashboard)
|
||||
restart_dashboard ;;
|
||||
*)
|
||||
echo "用法: $0 --restart [all|leader|kimi|qianwen|dashboard]"; exit 1 ;;
|
||||
esac
|
||||
print_status
|
||||
;;
|
||||
--help|-h)
|
||||
echo "🏥 AI Team Bot Doctor"
|
||||
echo ""
|
||||
echo "用法:"
|
||||
echo " $0 一次性检查 + 自动修复"
|
||||
echo " $0 --status 仅查看状态"
|
||||
echo " $0 --watch 持续监控(每 ${WATCH_INTERVAL}s)"
|
||||
echo " $0 --restart all 手动重启所有"
|
||||
echo " $0 --restart kimi 手动重启指定 bot"
|
||||
echo ""
|
||||
echo "支持的 bot: leader, kimi, qianwen, dashboard, all"
|
||||
;;
|
||||
*)
|
||||
print_status
|
||||
do_heal
|
||||
;;
|
||||
esac
|
||||
Reference in New Issue
Block a user