monitoring.sh
· 17 KiB · Bash
Ham
#!/bin/bash
# Цветове за по-добър изглед
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
echo -e "${GREEN}================================${NC}"
echo -e "${GREEN}Proxmox LXC Monitoring Stack${NC}"
echo -e "${GREEN}================================${NC}\n"
# Проверка за root права
if [[ $EUID -ne 0 ]]; then
echo -e "${RED}Този скрипт трябва да се изпълни като root!${NC}"
exit 1
fi
# Въвеждане на Telegram данни
echo -e "${YELLOW}Моля въведете Telegram Bot Token:${NC}"
read -r TELEGRAM_TOKEN
echo -e "${YELLOW}Моля въведете Telegram Chat ID (БЕЗ кавички):${NC}"
read -r TELEGRAM_CHAT_ID
# Проверка дали са въведени
if [ -z "$TELEGRAM_TOKEN" ] || [ -z "$TELEGRAM_CHAT_ID" ]; then
echo -e "${RED}Telegram данните са задължителни!${NC}"
exit 1
fi
echo -e "\n${GREEN}Започва инсталацията в LXC контейнера...${NC}\n"
# Актуализация на системата
echo -e "${YELLOW}[1/9] Актуализация на системата...${NC}"
apt-get update -qq
apt-get install -y wget curl tar net-tools > /dev/null 2>&1
# Създаване на потребители
echo -e "${YELLOW}[2/9] Създаване на системни потребители...${NC}"
useradd --no-create-home --shell /bin/false prometheus 2>/dev/null || true
useradd --no-create-home --shell /bin/false node_exporter 2>/dev/null || true
useradd --no-create-home --shell /bin/false alertmanager 2>/dev/null || true
# Инсталация на Prometheus
echo -e "${YELLOW}[3/9] Инсталация на Prometheus...${NC}"
PROM_VERSION="2.47.0"
wget -q https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.linux-amd64.tar.gz
tar xzf prometheus-${PROM_VERSION}.linux-amd64.tar.gz
cp prometheus-${PROM_VERSION}.linux-amd64/prometheus /usr/local/bin/
cp prometheus-${PROM_VERSION}.linux-amd64/promtool /usr/local/bin/
chown prometheus:prometheus /usr/local/bin/prometheus /usr/local/bin/promtool
rm -rf prometheus-${PROM_VERSION}.linux-amd64*
mkdir -p /etc/prometheus /var/lib/prometheus
chown prometheus:prometheus /etc/prometheus /var/lib/prometheus
# Конфигурация на Prometheus за LXC контейнери
cat > /etc/prometheus/prometheus.yml <<EOF
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
monitor: 'proxmox-lxc-monitor'
alerting:
alertmanagers:
- static_configs:
- targets:
- localhost:9093
rule_files:
- "/etc/prometheus/alerts.yml"
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
labels:
alias: 'Prometheus Server'
type: 'monitoring'
- job_name: 'monitoring-container'
static_configs:
- targets: ['localhost:9100']
labels:
alias: 'Monitoring LXC'
type: 'lxc-container'
- job_name: 'lxc-containers'
static_configs:
- targets: []
# Добави тук IP адресите на другите контейнери:
# - targets: ['10.0.0.101:9100']
# labels:
# alias: 'Web Server'
# type: 'lxc-container'
# - targets: ['10.0.0.102:9100']
# labels:
# alias: 'Database Server'
# type: 'lxc-container'
- job_name: 'blackbox'
metrics_path: /probe
params:
module: [http_2xx]
static_configs:
- targets:
- http://localhost:9090
- https://www.google.com
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: localhost:9115
EOF
# Създаване на alert правила
cat > /etc/prometheus/alerts.yml <<EOF
groups:
- name: test_alerts
interval: 10s
rules:
- alert: TestAlert
expr: up{job="prometheus"} == 1
for: 5s
labels:
severity: info
annotations:
summary: "🎉 Proxmox мониторингът е активен!"
description: "Prometheus е конфигуриран успешно и следи LXC контейнерите."
- name: lxc_container_alerts
interval: 30s
rules:
- alert: LXCContainerDown
expr: up{type="lxc-container"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "⚠️ LXC контейнер {{ \$labels.alias }} е DOWN"
description: "Контейнер {{ \$labels.instance }} не отговаря повече от 1 минута."
- alert: HighCPUUsage
expr: 100 - (avg by(instance, alias) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
for: 2m
labels:
severity: warning
annotations:
summary: "🔥 Високо CPU натоварване"
description: "{{ \$labels.alias }} ({{ \$labels.instance }}) използва {{ \$value | humanize }}% CPU"
- alert: HighMemoryUsage
expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100 > 80
for: 2m
labels:
severity: warning
annotations:
summary: "💾 Високо RAM използване"
description: "{{ \$labels.alias }} ({{ \$labels.instance }}) използва {{ \$value | humanize }}% RAM"
- alert: DiskSpaceLow
expr: (node_filesystem_avail_bytes{fstype!="tmpfs",mountpoint="/"} / node_filesystem_size_bytes{fstype!="tmpfs",mountpoint="/"}) * 100 < 20
for: 2m
labels:
severity: warning
annotations:
summary: "💿 Малко дисково пространство"
description: "{{ \$labels.alias }} ({{ \$labels.instance }}) има само {{ \$value | humanize }}% свободно място"
- alert: HighLoadAverage
expr: node_load5 / count(node_cpu_seconds_total{mode="idle"}) without (cpu, mode) > 2
for: 5m
labels:
severity: warning
annotations:
summary: "📊 Високо системно натоварване"
description: "{{ \$labels.alias }} има Load Average: {{ \$value | humanize }}"
- alert: NetworkErrors
expr: rate(node_network_receive_errs_total[5m]) > 10 or rate(node_network_transmit_errs_total[5m]) > 10
for: 2m
labels:
severity: warning
annotations:
summary: "🌐 Мрежови грешки"
description: "{{ \$labels.alias }} има {{ \$value | humanize }} грешки/сек на интерфейс {{ \$labels.device }}"
EOF
chown -R prometheus:prometheus /etc/prometheus
# Systemd service за Prometheus
cat > /etc/systemd/system/prometheus.service <<EOF
[Unit]
Description=Prometheus Monitoring for Proxmox LXC
Wants=network-online.target
After=network-online.target
[Service]
User=prometheus
Group=prometheus
Type=simple
ExecStart=/usr/local/bin/prometheus \\
--config.file=/etc/prometheus/prometheus.yml \\
--storage.tsdb.path=/var/lib/prometheus/ \\
--web.console.templates=/etc/prometheus/consoles \\
--web.console.libraries=/etc/prometheus/console_libraries \\
--storage.tsdb.retention.time=30d
[Install]
WantedBy=multi-user.target
EOF
# Инсталация на Node Exporter за този контейнер
echo -e "${YELLOW}[4/9] Инсталация на Node Exporter...${NC}"
NODE_VERSION="1.6.1"
wget -q https://github.com/prometheus/node_exporter/releases/download/v${NODE_VERSION}/node_exporter-${NODE_VERSION}.linux-amd64.tar.gz
tar xzf node_exporter-${NODE_VERSION}.linux-amd64.tar.gz
cp node_exporter-${NODE_VERSION}.linux-amd64/node_exporter /usr/local/bin/
chown node_exporter:node_exporter /usr/local/bin/node_exporter
rm -rf node_exporter-${NODE_VERSION}.linux-amd64*
cat > /etc/systemd/system/node_exporter.service <<EOF
[Unit]
Description=Node Exporter for LXC Container
Wants=network-online.target
After=network-online.target
[Service]
User=node_exporter
Group=node_exporter
Type=simple
ExecStart=/usr/local/bin/node_exporter
[Install]
WantedBy=multi-user.target
EOF
# Инсталация на Blackbox Exporter
echo -e "${YELLOW}[5/9] Инсталация на Blackbox Exporter...${NC}"
BLACKBOX_VERSION="0.24.0"
wget -q https://github.com/prometheus/blackbox_exporter/releases/download/v${BLACKBOX_VERSION}/blackbox_exporter-${BLACKBOX_VERSION}.linux-amd64.tar.gz
tar xzf blackbox_exporter-${BLACKBOX_VERSION}.linux-amd64.tar.gz
cp blackbox_exporter-${BLACKBOX_VERSION}.linux-amd64/blackbox_exporter /usr/local/bin/
chown prometheus:prometheus /usr/local/bin/blackbox_exporter
rm -rf blackbox_exporter-${BLACKBOX_VERSION}.linux-amd64*
mkdir -p /etc/blackbox_exporter
cat > /etc/blackbox_exporter/config.yml <<EOF
modules:
http_2xx:
prober: http
timeout: 5s
http:
valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
valid_status_codes: []
method: GET
follow_redirects: true
tcp_connect:
prober: tcp
timeout: 5s
icmp:
prober: icmp
timeout: 5s
EOF
cat > /etc/systemd/system/blackbox_exporter.service <<EOF
[Unit]
Description=Blackbox Exporter
Wants=network-online.target
After=network-online.target
[Service]
User=prometheus
Group=prometheus
Type=simple
ExecStart=/usr/local/bin/blackbox_exporter --config.file=/etc/blackbox_exporter/config.yml
[Install]
WantedBy=multi-user.target
EOF
# Инсталация на Alertmanager
echo -e "${YELLOW}[6/9] Инсталация на Alertmanager...${NC}"
ALERT_VERSION="0.26.0"
wget -q https://github.com/prometheus/alertmanager/releases/download/v${ALERT_VERSION}/alertmanager-${ALERT_VERSION}.linux-amd64.tar.gz
tar xzf alertmanager-${ALERT_VERSION}.linux-amd64.tar.gz
cp alertmanager-${ALERT_VERSION}.linux-amd64/alertmanager /usr/local/bin/
cp alertmanager-${ALERT_VERSION}.linux-amd64/amtool /usr/local/bin/
chown alertmanager:alertmanager /usr/local/bin/alertmanager /usr/local/bin/amtool
rm -rf alertmanager-${ALERT_VERSION}.linux-amd64*
mkdir -p /etc/alertmanager /var/lib/alertmanager
chown -R alertmanager:alertmanager /etc/alertmanager /var/lib/alertmanager
# Конфигурация на Alertmanager с Telegram
cat > /etc/alertmanager/alertmanager.yml <<EOF
global:
resolve_timeout: 5m
route:
group_by: ['alertname', 'instance', 'alias']
group_wait: 10s
group_interval: 10s
repeat_interval: 12h
receiver: 'telegram'
receivers:
- name: 'telegram'
telegram_configs:
- bot_token: '${TELEGRAM_TOKEN}'
chat_id: ${TELEGRAM_CHAT_ID}
parse_mode: 'HTML'
message: |
<b>{{ .Status | toUpper }}</b> - Proxmox LXC Monitor
{{ range .Alerts }}
<b>🏷 Alert:</b> {{ .Labels.alertname }}
<b>📍 Container:</b> {{ .Labels.alias }} ({{ .Labels.instance }})
<b>🔴 Severity:</b> {{ .Labels.severity }}
<b>📝 Summary:</b> {{ .Annotations.summary }}
<b>ℹ️ Details:</b> {{ .Annotations.description }}
<b>⏰ Started:</b> {{ .StartsAt.Format "02.01.2006 15:04:05" }}
{{ end }}
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'instance']
EOF
chown -R alertmanager:alertmanager /etc/alertmanager
cat > /etc/systemd/system/alertmanager.service <<EOF
[Unit]
Description=Alertmanager for Proxmox LXC
Wants=network-online.target
After=network-online.target
[Service]
User=alertmanager
Group=alertmanager
Type=simple
ExecStart=/usr/local/bin/alertmanager \\
--config.file=/etc/alertmanager/alertmanager.yml \\
--storage.path=/var/lib/alertmanager/
[Install]
WantedBy=multi-user.target
EOF
# Инсталация на Grafana
echo -e "${YELLOW}[7/9] Инсталация на Grafana...${NC}"
apt-get install -y apt-transport-https software-properties-common > /dev/null 2>&1
wget -q -O /usr/share/keyrings/grafana.key https://apt.grafana.com/gpg.key
echo "deb [signed-by=/usr/share/keyrings/grafana.key] https://apt.grafana.com stable main" | tee /etc/apt/sources.list.d/grafana.list > /dev/null
apt-get update -qq
apt-get install -y grafana > /dev/null 2>&1
# Конфигурация на Grafana datasource
mkdir -p /etc/grafana/provisioning/datasources
cat > /etc/grafana/provisioning/datasources/prometheus.yml <<EOF
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://localhost:9090
isDefault: true
editable: true
jsonData:
timeInterval: "15s"
EOF
# Конфигурация на Grafana dashboards
mkdir -p /etc/grafana/provisioning/dashboards
cat > /etc/grafana/provisioning/dashboards/default.yml <<EOF
apiVersion: 1
providers:
- name: 'Proxmox LXC'
orgId: 1
folder: 'LXC Containers'
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /var/lib/grafana/dashboards
EOF
mkdir -p /var/lib/grafana/dashboards
# Изтегляне на dashboards
echo -e "${YELLOW}[8/9] Изтегляне на Grafana dashboards...${NC}"
wget -q -O /var/lib/grafana/dashboards/node-exporter.json https://grafana.com/api/dashboards/1860/revisions/latest/download
wget -q -O /var/lib/grafana/dashboards/prometheus-stats.json https://grafana.com/api/dashboards/2/revisions/latest/download
chown -R grafana:grafana /var/lib/grafana/dashboards
# Стартиране на всички сервизи
echo -e "${YELLOW}[9/9] Стартиране на сервизите...${NC}"
systemctl daemon-reload
systemctl enable prometheus node_exporter blackbox_exporter alertmanager grafana-server > /dev/null 2>&1
systemctl start node_exporter
systemctl start blackbox_exporter
systemctl start prometheus
systemctl start alertmanager
systemctl start grafana-server
# Изчакване за стартиране
sleep 5
# Проверка на статуса
echo -e "\n${BLUE}Проверка на статуса на сервизите...${NC}\n"
SERVICES=("prometheus" "node_exporter" "blackbox_exporter" "alertmanager" "grafana-server")
ALL_OK=true
for service in "${SERVICES[@]}"; do
if systemctl is-active --quiet $service; then
echo -e "${GREEN}✓${NC} $service: Running"
else
echo -e "${RED}✗${NC} $service: Failed"
ALL_OK=false
fi
done
# Тригване на тестов alert
echo -e "\n${YELLOW}Изпращане на тестов alert...${NC}"
sleep 3
curl -X POST http://localhost:9090/-/reload 2>/dev/null
# Финален изход
echo -e "\n${GREEN}================================${NC}"
echo -e "${GREEN}Инсталацията завърши успешно!${NC}"
echo -e "${GREEN}================================${NC}\n"
SERVER_IP=$(hostname -I | awk '{print $1}')
echo -e "${BLUE}╔════════════════════════════════════════╗${NC}"
echo -e "${BLUE}║ Proxmox LXC Monitoring Stack ║${NC}"
echo -e "${BLUE}╚════════════════════════════════════════╝${NC}\n"
echo -e "${YELLOW}🌐 Достъп до услугите:${NC}"
echo -e " Prometheus: http://${SERVER_IP}:9090"
echo -e " Alertmanager: http://${SERVER_IP}:9093"
echo -e " Grafana: http://${SERVER_IP}:3000"
echo -e " └─ User: ${GREEN}admin${NC}"
echo -e " └─ Pass: ${GREEN}admin${NC}"
echo -e " Node Exporter: http://${SERVER_IP}:9100/metrics"
echo -e " Blackbox Export: http://${SERVER_IP}:9115"
echo -e "\n${YELLOW}📁 Конфигурационни файлове:${NC}"
echo -e " Prometheus: /etc/prometheus/prometheus.yml"
echo -e " Alerts: /etc/prometheus/alerts.yml"
echo -e " Alertmanager: /etc/alertmanager/alertmanager.yml"
echo -e "\n${YELLOW}📋 Следващи стъпки:${NC}"
echo -e " 1. Инсталирай Node Exporter в другите LXC контейнери"
echo -e " 2. Добави IP адресите им в: ${GREEN}/etc/prometheus/prometheus.yml${NC}"
echo -e " 3. Рестартирай Prometheus: ${GREEN}systemctl restart prometheus${NC}"
echo -e "\n${GREEN}🎉 Тестов alert ще бъде изпратен в Telegram след 5-10 секунди!${NC}"
echo -e "${YELLOW}📱 Telegram Chat ID: ${TELEGRAM_CHAT_ID}${NC}\n"
# Създаване на helper скриптове
cat > /usr/local/bin/add-lxc-container <<'ADDSCRIPT'
#!/bin/bash
if [ $# -ne 2 ]; then
echo "Usage: add-lxc-container <IP> <Alias>"
echo "Example: add-lxc-container 10.0.0.101 'Web Server'"
exit 1
fi
IP=$1
ALIAS=$2
# Добавяне в Prometheus config преди последния празен targets
sed -i "/- targets: \[\]/i\\ - targets: ['${IP}:9100']\n labels:\n alias: '${ALIAS}'\n type: 'lxc-container'" /etc/prometheus/prometheus.yml
systemctl restart prometheus
echo "✓ Добавен контейнер: $ALIAS ($IP)"
echo "Провери в Prometheus: http://localhost:9090/targets"
ADDSCRIPT
chmod +x /usr/local/bin/add-lxc-container
echo -e "${GREEN}✓ Създаден helper скрипт: ${YELLOW}add-lxc-container${NC}"
echo -e " Използвай: ${GREEN}add-lxc-container 10.0.0.101 'Web Server'${NC}\n"
if [ "$ALL_OK" = true ]; then
exit 0
else
echo -e "${RED}Някои сервизи не стартираха правилно. Проверете логовете.${NC}"
exit 1
fi
| 1 | #!/bin/bash |
| 2 | |
| 3 | # Цветове за по-добър изглед |
| 4 | RED='\033[0;31m' |
| 5 | GREEN='\033[0;32m' |
| 6 | YELLOW='\033[1;33m' |
| 7 | BLUE='\033[0;34m' |
| 8 | NC='\033[0m' # No Color |
| 9 | |
| 10 | echo -e "${GREEN}================================${NC}" |
| 11 | echo -e "${GREEN}Proxmox LXC Monitoring Stack${NC}" |
| 12 | echo -e "${GREEN}================================${NC}\n" |
| 13 | |
| 14 | # Проверка за root права |
| 15 | if [[ $EUID -ne 0 ]]; then |
| 16 | echo -e "${RED}Този скрипт трябва да се изпълни като root!${NC}" |
| 17 | exit 1 |
| 18 | fi |
| 19 | |
| 20 | # Въвеждане на Telegram данни |
| 21 | echo -e "${YELLOW}Моля въведете Telegram Bot Token:${NC}" |
| 22 | read -r TELEGRAM_TOKEN |
| 23 | |
| 24 | echo -e "${YELLOW}Моля въведете Telegram Chat ID (БЕЗ кавички):${NC}" |
| 25 | read -r TELEGRAM_CHAT_ID |
| 26 | |
| 27 | # Проверка дали са въведени |
| 28 | if [ -z "$TELEGRAM_TOKEN" ] || [ -z "$TELEGRAM_CHAT_ID" ]; then |
| 29 | echo -e "${RED}Telegram данните са задължителни!${NC}" |
| 30 | exit 1 |
| 31 | fi |
| 32 | |
| 33 | echo -e "\n${GREEN}Започва инсталацията в LXC контейнера...${NC}\n" |
| 34 | |
| 35 | # Актуализация на системата |
| 36 | echo -e "${YELLOW}[1/9] Актуализация на системата...${NC}" |
| 37 | apt-get update -qq |
| 38 | apt-get install -y wget curl tar net-tools > /dev/null 2>&1 |
| 39 | |
| 40 | # Създаване на потребители |
| 41 | echo -e "${YELLOW}[2/9] Създаване на системни потребители...${NC}" |
| 42 | useradd --no-create-home --shell /bin/false prometheus 2>/dev/null || true |
| 43 | useradd --no-create-home --shell /bin/false node_exporter 2>/dev/null || true |
| 44 | useradd --no-create-home --shell /bin/false alertmanager 2>/dev/null || true |
| 45 | |
| 46 | # Инсталация на Prometheus |
| 47 | echo -e "${YELLOW}[3/9] Инсталация на Prometheus...${NC}" |
| 48 | PROM_VERSION="2.47.0" |
| 49 | wget -q https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.linux-amd64.tar.gz |
| 50 | tar xzf prometheus-${PROM_VERSION}.linux-amd64.tar.gz |
| 51 | cp prometheus-${PROM_VERSION}.linux-amd64/prometheus /usr/local/bin/ |
| 52 | cp prometheus-${PROM_VERSION}.linux-amd64/promtool /usr/local/bin/ |
| 53 | chown prometheus:prometheus /usr/local/bin/prometheus /usr/local/bin/promtool |
| 54 | rm -rf prometheus-${PROM_VERSION}.linux-amd64* |
| 55 | |
| 56 | mkdir -p /etc/prometheus /var/lib/prometheus |
| 57 | chown prometheus:prometheus /etc/prometheus /var/lib/prometheus |
| 58 | |
| 59 | # Конфигурация на Prometheus за LXC контейнери |
| 60 | cat > /etc/prometheus/prometheus.yml <<EOF |
| 61 | global: |
| 62 | scrape_interval: 15s |
| 63 | evaluation_interval: 15s |
| 64 | external_labels: |
| 65 | monitor: 'proxmox-lxc-monitor' |
| 66 | |
| 67 | alerting: |
| 68 | alertmanagers: |
| 69 | - static_configs: |
| 70 | - targets: |
| 71 | - localhost:9093 |
| 72 | |
| 73 | rule_files: |
| 74 | - "/etc/prometheus/alerts.yml" |
| 75 | |
| 76 | scrape_configs: |
| 77 | - job_name: 'prometheus' |
| 78 | static_configs: |
| 79 | - targets: ['localhost:9090'] |
| 80 | labels: |
| 81 | alias: 'Prometheus Server' |
| 82 | type: 'monitoring' |
| 83 | |
| 84 | - job_name: 'monitoring-container' |
| 85 | static_configs: |
| 86 | - targets: ['localhost:9100'] |
| 87 | labels: |
| 88 | alias: 'Monitoring LXC' |
| 89 | type: 'lxc-container' |
| 90 | |
| 91 | - job_name: 'lxc-containers' |
| 92 | static_configs: |
| 93 | - targets: [] |
| 94 | # Добави тук IP адресите на другите контейнери: |
| 95 | # - targets: ['10.0.0.101:9100'] |
| 96 | # labels: |
| 97 | # alias: 'Web Server' |
| 98 | # type: 'lxc-container' |
| 99 | # - targets: ['10.0.0.102:9100'] |
| 100 | # labels: |
| 101 | # alias: 'Database Server' |
| 102 | # type: 'lxc-container' |
| 103 | |
| 104 | - job_name: 'blackbox' |
| 105 | metrics_path: /probe |
| 106 | params: |
| 107 | module: [http_2xx] |
| 108 | static_configs: |
| 109 | - targets: |
| 110 | - http://localhost:9090 |
| 111 | - https://www.google.com |
| 112 | relabel_configs: |
| 113 | - source_labels: [__address__] |
| 114 | target_label: __param_target |
| 115 | - source_labels: [__param_target] |
| 116 | target_label: instance |
| 117 | - target_label: __address__ |
| 118 | replacement: localhost:9115 |
| 119 | EOF |
| 120 | |
| 121 | # Създаване на alert правила |
| 122 | cat > /etc/prometheus/alerts.yml <<EOF |
| 123 | groups: |
| 124 | - name: test_alerts |
| 125 | interval: 10s |
| 126 | rules: |
| 127 | - alert: TestAlert |
| 128 | expr: up{job="prometheus"} == 1 |
| 129 | for: 5s |
| 130 | labels: |
| 131 | severity: info |
| 132 | annotations: |
| 133 | summary: "🎉 Proxmox мониторингът е активен!" |
| 134 | description: "Prometheus е конфигуриран успешно и следи LXC контейнерите." |
| 135 | |
| 136 | - name: lxc_container_alerts |
| 137 | interval: 30s |
| 138 | rules: |
| 139 | - alert: LXCContainerDown |
| 140 | expr: up{type="lxc-container"} == 0 |
| 141 | for: 1m |
| 142 | labels: |
| 143 | severity: critical |
| 144 | annotations: |
| 145 | summary: "⚠️ LXC контейнер {{ \$labels.alias }} е DOWN" |
| 146 | description: "Контейнер {{ \$labels.instance }} не отговаря повече от 1 минута." |
| 147 | |
| 148 | - alert: HighCPUUsage |
| 149 | expr: 100 - (avg by(instance, alias) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80 |
| 150 | for: 2m |
| 151 | labels: |
| 152 | severity: warning |
| 153 | annotations: |
| 154 | summary: "🔥 Високо CPU натоварване" |
| 155 | description: "{{ \$labels.alias }} ({{ \$labels.instance }}) използва {{ \$value | humanize }}% CPU" |
| 156 | |
| 157 | - alert: HighMemoryUsage |
| 158 | expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100 > 80 |
| 159 | for: 2m |
| 160 | labels: |
| 161 | severity: warning |
| 162 | annotations: |
| 163 | summary: "💾 Високо RAM използване" |
| 164 | description: "{{ \$labels.alias }} ({{ \$labels.instance }}) използва {{ \$value | humanize }}% RAM" |
| 165 | |
| 166 | - alert: DiskSpaceLow |
| 167 | expr: (node_filesystem_avail_bytes{fstype!="tmpfs",mountpoint="/"} / node_filesystem_size_bytes{fstype!="tmpfs",mountpoint="/"}) * 100 < 20 |
| 168 | for: 2m |
| 169 | labels: |
| 170 | severity: warning |
| 171 | annotations: |
| 172 | summary: "💿 Малко дисково пространство" |
| 173 | description: "{{ \$labels.alias }} ({{ \$labels.instance }}) има само {{ \$value | humanize }}% свободно място" |
| 174 | |
| 175 | - alert: HighLoadAverage |
| 176 | expr: node_load5 / count(node_cpu_seconds_total{mode="idle"}) without (cpu, mode) > 2 |
| 177 | for: 5m |
| 178 | labels: |
| 179 | severity: warning |
| 180 | annotations: |
| 181 | summary: "📊 Високо системно натоварване" |
| 182 | description: "{{ \$labels.alias }} има Load Average: {{ \$value | humanize }}" |
| 183 | |
| 184 | - alert: NetworkErrors |
| 185 | expr: rate(node_network_receive_errs_total[5m]) > 10 or rate(node_network_transmit_errs_total[5m]) > 10 |
| 186 | for: 2m |
| 187 | labels: |
| 188 | severity: warning |
| 189 | annotations: |
| 190 | summary: "🌐 Мрежови грешки" |
| 191 | description: "{{ \$labels.alias }} има {{ \$value | humanize }} грешки/сек на интерфейс {{ \$labels.device }}" |
| 192 | EOF |
| 193 | |
| 194 | chown -R prometheus:prometheus /etc/prometheus |
| 195 | |
| 196 | # Systemd service за Prometheus |
| 197 | cat > /etc/systemd/system/prometheus.service <<EOF |
| 198 | [Unit] |
| 199 | Description=Prometheus Monitoring for Proxmox LXC |
| 200 | Wants=network-online.target |
| 201 | After=network-online.target |
| 202 | |
| 203 | [Service] |
| 204 | User=prometheus |
| 205 | Group=prometheus |
| 206 | Type=simple |
| 207 | ExecStart=/usr/local/bin/prometheus \\ |
| 208 | --config.file=/etc/prometheus/prometheus.yml \\ |
| 209 | --storage.tsdb.path=/var/lib/prometheus/ \\ |
| 210 | --web.console.templates=/etc/prometheus/consoles \\ |
| 211 | --web.console.libraries=/etc/prometheus/console_libraries \\ |
| 212 | --storage.tsdb.retention.time=30d |
| 213 | |
| 214 | [Install] |
| 215 | WantedBy=multi-user.target |
| 216 | EOF |
| 217 | |
| 218 | # Инсталация на Node Exporter за този контейнер |
| 219 | echo -e "${YELLOW}[4/9] Инсталация на Node Exporter...${NC}" |
| 220 | NODE_VERSION="1.6.1" |
| 221 | wget -q https://github.com/prometheus/node_exporter/releases/download/v${NODE_VERSION}/node_exporter-${NODE_VERSION}.linux-amd64.tar.gz |
| 222 | tar xzf node_exporter-${NODE_VERSION}.linux-amd64.tar.gz |
| 223 | cp node_exporter-${NODE_VERSION}.linux-amd64/node_exporter /usr/local/bin/ |
| 224 | chown node_exporter:node_exporter /usr/local/bin/node_exporter |
| 225 | rm -rf node_exporter-${NODE_VERSION}.linux-amd64* |
| 226 | |
| 227 | cat > /etc/systemd/system/node_exporter.service <<EOF |
| 228 | [Unit] |
| 229 | Description=Node Exporter for LXC Container |
| 230 | Wants=network-online.target |
| 231 | After=network-online.target |
| 232 | |
| 233 | [Service] |
| 234 | User=node_exporter |
| 235 | Group=node_exporter |
| 236 | Type=simple |
| 237 | ExecStart=/usr/local/bin/node_exporter |
| 238 | |
| 239 | [Install] |
| 240 | WantedBy=multi-user.target |
| 241 | EOF |
| 242 | |
| 243 | # Инсталация на Blackbox Exporter |
| 244 | echo -e "${YELLOW}[5/9] Инсталация на Blackbox Exporter...${NC}" |
| 245 | BLACKBOX_VERSION="0.24.0" |
| 246 | wget -q https://github.com/prometheus/blackbox_exporter/releases/download/v${BLACKBOX_VERSION}/blackbox_exporter-${BLACKBOX_VERSION}.linux-amd64.tar.gz |
| 247 | tar xzf blackbox_exporter-${BLACKBOX_VERSION}.linux-amd64.tar.gz |
| 248 | cp blackbox_exporter-${BLACKBOX_VERSION}.linux-amd64/blackbox_exporter /usr/local/bin/ |
| 249 | chown prometheus:prometheus /usr/local/bin/blackbox_exporter |
| 250 | rm -rf blackbox_exporter-${BLACKBOX_VERSION}.linux-amd64* |
| 251 | |
| 252 | mkdir -p /etc/blackbox_exporter |
| 253 | cat > /etc/blackbox_exporter/config.yml <<EOF |
| 254 | modules: |
| 255 | http_2xx: |
| 256 | prober: http |
| 257 | timeout: 5s |
| 258 | http: |
| 259 | valid_http_versions: ["HTTP/1.1", "HTTP/2.0"] |
| 260 | valid_status_codes: [] |
| 261 | method: GET |
| 262 | follow_redirects: true |
| 263 | |
| 264 | tcp_connect: |
| 265 | prober: tcp |
| 266 | timeout: 5s |
| 267 | |
| 268 | icmp: |
| 269 | prober: icmp |
| 270 | timeout: 5s |
| 271 | EOF |
| 272 | |
| 273 | cat > /etc/systemd/system/blackbox_exporter.service <<EOF |
| 274 | [Unit] |
| 275 | Description=Blackbox Exporter |
| 276 | Wants=network-online.target |
| 277 | After=network-online.target |
| 278 | |
| 279 | [Service] |
| 280 | User=prometheus |
| 281 | Group=prometheus |
| 282 | Type=simple |
| 283 | ExecStart=/usr/local/bin/blackbox_exporter --config.file=/etc/blackbox_exporter/config.yml |
| 284 | |
| 285 | [Install] |
| 286 | WantedBy=multi-user.target |
| 287 | EOF |
| 288 | |
| 289 | # Инсталация на Alertmanager |
| 290 | echo -e "${YELLOW}[6/9] Инсталация на Alertmanager...${NC}" |
| 291 | ALERT_VERSION="0.26.0" |
| 292 | wget -q https://github.com/prometheus/alertmanager/releases/download/v${ALERT_VERSION}/alertmanager-${ALERT_VERSION}.linux-amd64.tar.gz |
| 293 | tar xzf alertmanager-${ALERT_VERSION}.linux-amd64.tar.gz |
| 294 | cp alertmanager-${ALERT_VERSION}.linux-amd64/alertmanager /usr/local/bin/ |
| 295 | cp alertmanager-${ALERT_VERSION}.linux-amd64/amtool /usr/local/bin/ |
| 296 | chown alertmanager:alertmanager /usr/local/bin/alertmanager /usr/local/bin/amtool |
| 297 | rm -rf alertmanager-${ALERT_VERSION}.linux-amd64* |
| 298 | |
| 299 | mkdir -p /etc/alertmanager /var/lib/alertmanager |
| 300 | chown -R alertmanager:alertmanager /etc/alertmanager /var/lib/alertmanager |
| 301 | |
| 302 | # Конфигурация на Alertmanager с Telegram |
| 303 | cat > /etc/alertmanager/alertmanager.yml <<EOF |
| 304 | global: |
| 305 | resolve_timeout: 5m |
| 306 | |
| 307 | route: |
| 308 | group_by: ['alertname', 'instance', 'alias'] |
| 309 | group_wait: 10s |
| 310 | group_interval: 10s |
| 311 | repeat_interval: 12h |
| 312 | receiver: 'telegram' |
| 313 | |
| 314 | receivers: |
| 315 | - name: 'telegram' |
| 316 | telegram_configs: |
| 317 | - bot_token: '${TELEGRAM_TOKEN}' |
| 318 | chat_id: ${TELEGRAM_CHAT_ID} |
| 319 | parse_mode: 'HTML' |
| 320 | message: | |
| 321 | <b>{{ .Status | toUpper }}</b> - Proxmox LXC Monitor |
| 322 | {{ range .Alerts }} |
| 323 | <b>🏷 Alert:</b> {{ .Labels.alertname }} |
| 324 | <b>📍 Container:</b> {{ .Labels.alias }} ({{ .Labels.instance }}) |
| 325 | <b>🔴 Severity:</b> {{ .Labels.severity }} |
| 326 | <b>📝 Summary:</b> {{ .Annotations.summary }} |
| 327 | <b>ℹ️ Details:</b> {{ .Annotations.description }} |
| 328 | <b>⏰ Started:</b> {{ .StartsAt.Format "02.01.2006 15:04:05" }} |
| 329 | {{ end }} |
| 330 | |
| 331 | inhibit_rules: |
| 332 | - source_match: |
| 333 | severity: 'critical' |
| 334 | target_match: |
| 335 | severity: 'warning' |
| 336 | equal: ['alertname', 'instance'] |
| 337 | EOF |
| 338 | |
| 339 | chown -R alertmanager:alertmanager /etc/alertmanager |
| 340 | |
| 341 | cat > /etc/systemd/system/alertmanager.service <<EOF |
| 342 | [Unit] |
| 343 | Description=Alertmanager for Proxmox LXC |
| 344 | Wants=network-online.target |
| 345 | After=network-online.target |
| 346 | |
| 347 | [Service] |
| 348 | User=alertmanager |
| 349 | Group=alertmanager |
| 350 | Type=simple |
| 351 | ExecStart=/usr/local/bin/alertmanager \\ |
| 352 | --config.file=/etc/alertmanager/alertmanager.yml \\ |
| 353 | --storage.path=/var/lib/alertmanager/ |
| 354 | |
| 355 | [Install] |
| 356 | WantedBy=multi-user.target |
| 357 | EOF |
| 358 | |
| 359 | # Инсталация на Grafana |
| 360 | echo -e "${YELLOW}[7/9] Инсталация на Grafana...${NC}" |
| 361 | apt-get install -y apt-transport-https software-properties-common > /dev/null 2>&1 |
| 362 | wget -q -O /usr/share/keyrings/grafana.key https://apt.grafana.com/gpg.key |
| 363 | echo "deb [signed-by=/usr/share/keyrings/grafana.key] https://apt.grafana.com stable main" | tee /etc/apt/sources.list.d/grafana.list > /dev/null |
| 364 | apt-get update -qq |
| 365 | apt-get install -y grafana > /dev/null 2>&1 |
| 366 | |
| 367 | # Конфигурация на Grafana datasource |
| 368 | mkdir -p /etc/grafana/provisioning/datasources |
| 369 | cat > /etc/grafana/provisioning/datasources/prometheus.yml <<EOF |
| 370 | apiVersion: 1 |
| 371 | |
| 372 | datasources: |
| 373 | - name: Prometheus |
| 374 | type: prometheus |
| 375 | access: proxy |
| 376 | url: http://localhost:9090 |
| 377 | isDefault: true |
| 378 | editable: true |
| 379 | jsonData: |
| 380 | timeInterval: "15s" |
| 381 | EOF |
| 382 | |
| 383 | # Конфигурация на Grafana dashboards |
| 384 | mkdir -p /etc/grafana/provisioning/dashboards |
| 385 | cat > /etc/grafana/provisioning/dashboards/default.yml <<EOF |
| 386 | apiVersion: 1 |
| 387 | |
| 388 | providers: |
| 389 | - name: 'Proxmox LXC' |
| 390 | orgId: 1 |
| 391 | folder: 'LXC Containers' |
| 392 | type: file |
| 393 | disableDeletion: false |
| 394 | updateIntervalSeconds: 10 |
| 395 | allowUiUpdates: true |
| 396 | options: |
| 397 | path: /var/lib/grafana/dashboards |
| 398 | EOF |
| 399 | |
| 400 | mkdir -p /var/lib/grafana/dashboards |
| 401 | |
| 402 | # Изтегляне на dashboards |
| 403 | echo -e "${YELLOW}[8/9] Изтегляне на Grafana dashboards...${NC}" |
| 404 | wget -q -O /var/lib/grafana/dashboards/node-exporter.json https://grafana.com/api/dashboards/1860/revisions/latest/download |
| 405 | wget -q -O /var/lib/grafana/dashboards/prometheus-stats.json https://grafana.com/api/dashboards/2/revisions/latest/download |
| 406 | |
| 407 | chown -R grafana:grafana /var/lib/grafana/dashboards |
| 408 | |
| 409 | # Стартиране на всички сервизи |
| 410 | echo -e "${YELLOW}[9/9] Стартиране на сервизите...${NC}" |
| 411 | systemctl daemon-reload |
| 412 | |
| 413 | systemctl enable prometheus node_exporter blackbox_exporter alertmanager grafana-server > /dev/null 2>&1 |
| 414 | |
| 415 | systemctl start node_exporter |
| 416 | systemctl start blackbox_exporter |
| 417 | systemctl start prometheus |
| 418 | systemctl start alertmanager |
| 419 | systemctl start grafana-server |
| 420 | |
| 421 | # Изчакване за стартиране |
| 422 | sleep 5 |
| 423 | |
| 424 | # Проверка на статуса |
| 425 | echo -e "\n${BLUE}Проверка на статуса на сервизите...${NC}\n" |
| 426 | SERVICES=("prometheus" "node_exporter" "blackbox_exporter" "alertmanager" "grafana-server") |
| 427 | ALL_OK=true |
| 428 | |
| 429 | for service in "${SERVICES[@]}"; do |
| 430 | if systemctl is-active --quiet $service; then |
| 431 | echo -e "${GREEN}✓${NC} $service: Running" |
| 432 | else |
| 433 | echo -e "${RED}✗${NC} $service: Failed" |
| 434 | ALL_OK=false |
| 435 | fi |
| 436 | done |
| 437 | |
| 438 | # Тригване на тестов alert |
| 439 | echo -e "\n${YELLOW}Изпращане на тестов alert...${NC}" |
| 440 | sleep 3 |
| 441 | curl -X POST http://localhost:9090/-/reload 2>/dev/null |
| 442 | |
| 443 | # Финален изход |
| 444 | echo -e "\n${GREEN}================================${NC}" |
| 445 | echo -e "${GREEN}Инсталацията завърши успешно!${NC}" |
| 446 | echo -e "${GREEN}================================${NC}\n" |
| 447 | |
| 448 | SERVER_IP=$(hostname -I | awk '{print $1}') |
| 449 | |
| 450 | echo -e "${BLUE}╔════════════════════════════════════════╗${NC}" |
| 451 | echo -e "${BLUE}║ Proxmox LXC Monitoring Stack ║${NC}" |
| 452 | echo -e "${BLUE}╚════════════════════════════════════════╝${NC}\n" |
| 453 | |
| 454 | echo -e "${YELLOW}🌐 Достъп до услугите:${NC}" |
| 455 | echo -e " Prometheus: http://${SERVER_IP}:9090" |
| 456 | echo -e " Alertmanager: http://${SERVER_IP}:9093" |
| 457 | echo -e " Grafana: http://${SERVER_IP}:3000" |
| 458 | echo -e " └─ User: ${GREEN}admin${NC}" |
| 459 | echo -e " └─ Pass: ${GREEN}admin${NC}" |
| 460 | echo -e " Node Exporter: http://${SERVER_IP}:9100/metrics" |
| 461 | echo -e " Blackbox Export: http://${SERVER_IP}:9115" |
| 462 | |
| 463 | echo -e "\n${YELLOW}📁 Конфигурационни файлове:${NC}" |
| 464 | echo -e " Prometheus: /etc/prometheus/prometheus.yml" |
| 465 | echo -e " Alerts: /etc/prometheus/alerts.yml" |
| 466 | echo -e " Alertmanager: /etc/alertmanager/alertmanager.yml" |
| 467 | |
| 468 | echo -e "\n${YELLOW}📋 Следващи стъпки:${NC}" |
| 469 | echo -e " 1. Инсталирай Node Exporter в другите LXC контейнери" |
| 470 | echo -e " 2. Добави IP адресите им в: ${GREEN}/etc/prometheus/prometheus.yml${NC}" |
| 471 | echo -e " 3. Рестартирай Prometheus: ${GREEN}systemctl restart prometheus${NC}" |
| 472 | |
| 473 | echo -e "\n${GREEN}🎉 Тестов alert ще бъде изпратен в Telegram след 5-10 секунди!${NC}" |
| 474 | echo -e "${YELLOW}📱 Telegram Chat ID: ${TELEGRAM_CHAT_ID}${NC}\n" |
| 475 | |
| 476 | # Създаване на helper скриптове |
| 477 | cat > /usr/local/bin/add-lxc-container <<'ADDSCRIPT' |
| 478 | #!/bin/bash |
| 479 | if [ $# -ne 2 ]; then |
| 480 | echo "Usage: add-lxc-container <IP> <Alias>" |
| 481 | echo "Example: add-lxc-container 10.0.0.101 'Web Server'" |
| 482 | exit 1 |
| 483 | fi |
| 484 | |
| 485 | IP=$1 |
| 486 | ALIAS=$2 |
| 487 | |
| 488 | # Добавяне в Prometheus config преди последния празен targets |
| 489 | sed -i "/- targets: \[\]/i\\ - targets: ['${IP}:9100']\n labels:\n alias: '${ALIAS}'\n type: 'lxc-container'" /etc/prometheus/prometheus.yml |
| 490 | |
| 491 | systemctl restart prometheus |
| 492 | echo "✓ Добавен контейнер: $ALIAS ($IP)" |
| 493 | echo "Провери в Prometheus: http://localhost:9090/targets" |
| 494 | ADDSCRIPT |
| 495 | |
| 496 | chmod +x /usr/local/bin/add-lxc-container |
| 497 | |
| 498 | echo -e "${GREEN}✓ Създаден helper скрипт: ${YELLOW}add-lxc-container${NC}" |
| 499 | echo -e " Използвай: ${GREEN}add-lxc-container 10.0.0.101 'Web Server'${NC}\n" |
| 500 | |
| 501 | if [ "$ALL_OK" = true ]; then |
| 502 | exit 0 |
| 503 | else |
| 504 | echo -e "${RED}Някои сервизи не стартираха правилно. Проверете логовете.${NC}" |
| 505 | exit 1 |
| 506 | fi |
| 507 |