Files
ai-team-dashboard/scripts/bot-doctor.sh
fang 5f14174bb9 feat: 添加 Dashboard 完整日志监控系统 v1.1.0
 新增功能
- 完整的日志记录系统(6 种日志级别)
- 日志配置功能(可通过 config.json 控制)
- 性能监控装饰器和请求日志中间件
- 7 个管理工具脚本
- 完整的文档和使用指南

🛠️ 管理工具
- start-with-log.sh: 启动脚本(带日志)
- stop-dashboard.sh: 停止脚本
- view-logs.sh: 日志查看器
- monitor-logs.sh: 实时监控工具(支持多种过滤器)
- analyze-logs.sh: 日志分析工具(自动生成报告)
- demo-logging.sh: 功能演示脚本
- test-logging-config.sh: 配置测试工具

📊 日志特性
- 支持 INFO/SUCCESS/WARN/ERROR/DEBUG/PERF 6 种级别
- 自动记录启动过程、API 请求、性能统计
- 缓存命中情况追踪
- 分步性能监控
- 智能过滤器

⚙️ 配置功能
- 可控制是否启用日志(默认:true)
- 可设置日志级别(默认:INFO)
- 可控制文件/控制台输出
- 支持动态配置(重启生效)

📚 文档
- LOGGING_GUIDE.md: 完整使用指南
- LOGGING_CONFIG.md: 配置说明文档
- LOGGING_CONFIG_QUICK.md: 快速配置指南
- 多个中文说明文档

🔒 安全
- 添加 .gitignore 排除敏感信息
- config.json(含 Token)不提交
- 日志文件不提交
- 示例配置使用占位符

 测试
- 语法检查通过
- 功能完整性验证
- 配置控制测试通过
- 文档完整性检查

详见 CHANGELOG_v1.1.0.md

Made-with: Cursor
2026-03-11 11:37:35 +08:00

205 lines
6.1 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
# bot-doctor.sh — AI Team 三 Bot 健康监控 & 自动重启
# 用法:
# ./bot-doctor.sh 一次性检查 + 修复
# ./bot-doctor.sh --watch 持续监控(每 60 秒)
# ./bot-doctor.sh --status 仅查看状态,不修复
# ./bot-doctor.sh --restart all|leader|kimi|qianwen|dashboard 手动重启
COMPOSE_DIR="/Users/fang/Desktop/ai-team"
DASHBOARD_DIR="/Users/fang/Desktop/ai-team/dashboard"
GATEWAY_URL="http://127.0.0.1:18789/healthz"
DASHBOARD_URL="http://localhost:3800"
WATCH_INTERVAL=60
LOG_FILE="/Users/fang/Desktop/ai-team/logs/bot-doctor.log"
mkdir -p "$(dirname "$LOG_FILE")"
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[0;33m'; CYAN='\033[0;36m'; NC='\033[0m'; BOLD='\033[1m'
log() { local msg="[$(date '+%Y-%m-%d %H:%M:%S')] $1"; echo "$msg" >> "$LOG_FILE"; echo -e "$msg"; }
check_leader() {
local code
code=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "$GATEWAY_URL" 2>/dev/null)
[ "$code" = "200" ]
}
check_container() {
local state
state=$(docker inspect --format='{{.State.Status}}' "$1" 2>/dev/null)
[ "$state" = "running" ]
}
check_dashboard() {
local code
code=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 3 "$DASHBOARD_URL" 2>/dev/null)
[ "$code" = "200" ]
}
restart_leader() {
log "${YELLOW}🦞 正在重启大龙虾 Gateway...${NC}"
launchctl bootout gui/$(id -u) ~/Library/LaunchAgents/ai.openclaw.gateway.plist 2>/dev/null
sleep 2
launchctl bootstrap gui/$(id -u) ~/Library/LaunchAgents/ai.openclaw.gateway.plist 2>/dev/null
sleep 3
if check_leader; then
log "${GREEN}✅ 大龙虾 Gateway 重启成功${NC}"
return 0
else
log "${RED}❌ 大龙虾 Gateway 重启失败,尝试 openclaw gateway...${NC}"
openclaw gateway &>/dev/null &
sleep 5
if check_leader; then
log "${GREEN}✅ 大龙虾 Gateway 通过 openclaw 启动成功${NC}"
return 0
fi
log "${RED}❌ 大龙虾 Gateway 启动失败!${NC}"
return 1
fi
}
restart_container() {
local name="$1" display="$2"
log "${YELLOW}🔄 正在重启 ${display} (${name})...${NC}"
docker restart "$name" 2>/dev/null
sleep 5
if check_container "$name"; then
log "${GREEN}${display} 重启成功${NC}"
return 0
else
log "${YELLOW}尝试 docker compose up...${NC}"
cd "$COMPOSE_DIR" && docker compose up -d "$name" 2>/dev/null
sleep 5
if check_container "$name"; then
log "${GREEN}${display} 通过 compose 启动成功${NC}"
return 0
fi
log "${RED}${display} 启动失败!${NC}"
return 1
fi
}
restart_dashboard() {
log "${YELLOW}📊 正在重启 Dashboard...${NC}"
lsof -ti :3800 | xargs kill 2>/dev/null
sleep 1
cd "$DASHBOARD_DIR" && nohup node server.js >> "$LOG_FILE" 2>&1 &
sleep 2
if check_dashboard; then
log "${GREEN}✅ Dashboard 重启成功${NC}"
return 0
fi
log "${RED}❌ Dashboard 启动失败!${NC}"
return 1
}
print_status() {
echo ""
echo -e "${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BOLD} 🏥 AI Team Bot Doctor $(date '+%H:%M:%S')${NC}"
echo -e "${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""
if check_leader; then
echo -e " 🦞 大龙虾 (Gateway) ${GREEN}● 运行中${NC}"
else
echo -e " 🦞 大龙虾 (Gateway) ${RED}✕ 已停止${NC}"
fi
if check_container "ai-team-kimi"; then
echo -e " 🔬 智囊团 (kimi) ${GREEN}● 运行中${NC}"
else
echo -e " 🔬 智囊团 (kimi) ${RED}✕ 已停止${NC}"
fi
if check_container "ai-team-qianwen"; then
echo -e " ⚡ 全栈高手 (qianwen) ${GREEN}● 运行中${NC}"
else
echo -e " ⚡ 全栈高手 (qianwen) ${RED}✕ 已停止${NC}"
fi
if check_dashboard; then
echo -e " 📊 Dashboard (:3800) ${GREEN}● 运行中${NC}"
else
echo -e " 📊 Dashboard (:3800) ${YELLOW}✕ 未运行${NC}"
fi
echo ""
}
do_heal() {
local fixed=0
if ! check_leader; then
restart_leader && fixed=$((fixed+1))
fi
if ! check_container "ai-team-kimi"; then
restart_container "ai-team-kimi" "智囊团" && fixed=$((fixed+1))
fi
if ! check_container "ai-team-qianwen"; then
restart_container "ai-team-qianwen" "全栈高手" && fixed=$((fixed+1))
fi
if ! check_dashboard; then
restart_dashboard && fixed=$((fixed+1))
fi
if [ $fixed -eq 0 ]; then
log "${GREEN}✅ 所有 Bot 运行正常,无需修复${NC}"
else
log "${CYAN}🔧 本次修复了 ${fixed} 个服务${NC}"
fi
}
case "${1:-}" in
--status|-s)
print_status
;;
--watch|-w)
log "🏥 Bot Doctor 持续监控模式启动(间隔 ${WATCH_INTERVAL}s"
while true; do
print_status
do_heal
echo -e "\n${CYAN} 下次检查: ${WATCH_INTERVAL}s 后 (Ctrl+C 退出)${NC}\n"
sleep "$WATCH_INTERVAL"
done
;;
--restart|-r)
target="${2:-all}"
case "$target" in
all)
restart_leader; restart_container "ai-team-kimi" "智囊团"; restart_container "ai-team-qianwen" "全栈高手"; restart_dashboard ;;
leader|dalongxia)
restart_leader ;;
kimi)
restart_container "ai-team-kimi" "智囊团" ;;
qianwen)
restart_container "ai-team-qianwen" "全栈高手" ;;
dashboard)
restart_dashboard ;;
*)
echo "用法: $0 --restart [all|leader|kimi|qianwen|dashboard]"; exit 1 ;;
esac
print_status
;;
--help|-h)
echo "🏥 AI Team Bot Doctor"
echo ""
echo "用法:"
echo " $0 一次性检查 + 自动修复"
echo " $0 --status 仅查看状态"
echo " $0 --watch 持续监控(每 ${WATCH_INTERVAL}s"
echo " $0 --restart all 手动重启所有"
echo " $0 --restart kimi 手动重启指定 bot"
echo ""
echo "支持的 bot: leader, kimi, qianwen, dashboard, all"
;;
*)
print_status
do_heal
;;
esac