Ostatnio aktywny 1 month ago

monitoring.sh Surowy
1#!/bin/bash
2
3# Цветове за по-добър изглед
4RED='\033[0;31m'
5GREEN='\033[0;32m'
6YELLOW='\033[1;33m'
7BLUE='\033[0;34m'
8NC='\033[0m' # No Color
9
10echo -e "${GREEN}================================${NC}"
11echo -e "${GREEN}Proxmox LXC Monitoring Stack${NC}"
12echo -e "${GREEN}================================${NC}\n"
13
14# Проверка за root права
15if [[ $EUID -ne 0 ]]; then
16 echo -e "${RED}Този скрипт трябва да се изпълни като root!${NC}"
17 exit 1
18fi
19
20# Въвеждане на Telegram данни
21echo -e "${YELLOW}Моля въведете Telegram Bot Token:${NC}"
22read -r TELEGRAM_TOKEN
23
24echo -e "${YELLOW}Моля въведете Telegram Chat ID (БЕЗ кавички):${NC}"
25read -r TELEGRAM_CHAT_ID
26
27# Проверка дали са въведени
28if [ -z "$TELEGRAM_TOKEN" ] || [ -z "$TELEGRAM_CHAT_ID" ]; then
29 echo -e "${RED}Telegram данните са задължителни!${NC}"
30 exit 1
31fi
32
33echo -e "\n${GREEN}Започва инсталацията в LXC контейнера...${NC}\n"
34
35# Актуализация на системата
36echo -e "${YELLOW}[1/9] Актуализация на системата...${NC}"
37apt-get update -qq
38apt-get install -y wget curl tar net-tools > /dev/null 2>&1
39
40# Създаване на потребители
41echo -e "${YELLOW}[2/9] Създаване на системни потребители...${NC}"
42useradd --no-create-home --shell /bin/false prometheus 2>/dev/null || true
43useradd --no-create-home --shell /bin/false node_exporter 2>/dev/null || true
44useradd --no-create-home --shell /bin/false alertmanager 2>/dev/null || true
45
46# Инсталация на Prometheus
47echo -e "${YELLOW}[3/9] Инсталация на Prometheus...${NC}"
48PROM_VERSION="2.47.0"
49wget -q https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.linux-amd64.tar.gz
50tar xzf prometheus-${PROM_VERSION}.linux-amd64.tar.gz
51cp prometheus-${PROM_VERSION}.linux-amd64/prometheus /usr/local/bin/
52cp prometheus-${PROM_VERSION}.linux-amd64/promtool /usr/local/bin/
53chown prometheus:prometheus /usr/local/bin/prometheus /usr/local/bin/promtool
54rm -rf prometheus-${PROM_VERSION}.linux-amd64*
55
56mkdir -p /etc/prometheus /var/lib/prometheus
57chown prometheus:prometheus /etc/prometheus /var/lib/prometheus
58
59# Конфигурация на Prometheus за LXC контейнери
60cat > /etc/prometheus/prometheus.yml <<EOF
61global:
62 scrape_interval: 15s
63 evaluation_interval: 15s
64 external_labels:
65 monitor: 'proxmox-lxc-monitor'
66
67alerting:
68 alertmanagers:
69 - static_configs:
70 - targets:
71 - localhost:9093
72
73rule_files:
74 - "/etc/prometheus/alerts.yml"
75
76scrape_configs:
77 - job_name: 'prometheus'
78 static_configs:
79 - targets: ['localhost:9090']
80 labels:
81 alias: 'Prometheus Server'
82 type: 'monitoring'
83
84 - job_name: 'monitoring-container'
85 static_configs:
86 - targets: ['localhost:9100']
87 labels:
88 alias: 'Monitoring LXC'
89 type: 'lxc-container'
90
91 - job_name: 'lxc-containers'
92 static_configs:
93 - targets: []
94 # Добави тук IP адресите на другите контейнери:
95 # - targets: ['10.0.0.101:9100']
96 # labels:
97 # alias: 'Web Server'
98 # type: 'lxc-container'
99 # - targets: ['10.0.0.102:9100']
100 # labels:
101 # alias: 'Database Server'
102 # type: 'lxc-container'
103
104 - job_name: 'blackbox'
105 metrics_path: /probe
106 params:
107 module: [http_2xx]
108 static_configs:
109 - targets:
110 - http://localhost:9090
111 - https://www.google.com
112 relabel_configs:
113 - source_labels: [__address__]
114 target_label: __param_target
115 - source_labels: [__param_target]
116 target_label: instance
117 - target_label: __address__
118 replacement: localhost:9115
119EOF
120
121# Създаване на alert правила
122cat > /etc/prometheus/alerts.yml <<EOF
123groups:
124 - name: test_alerts
125 interval: 10s
126 rules:
127 - alert: TestAlert
128 expr: up{job="prometheus"} == 1
129 for: 5s
130 labels:
131 severity: info
132 annotations:
133 summary: "🎉 Proxmox мониторингът е активен!"
134 description: "Prometheus е конфигуриран успешно и следи LXC контейнерите."
135
136 - name: lxc_container_alerts
137 interval: 30s
138 rules:
139 - alert: LXCContainerDown
140 expr: up{type="lxc-container"} == 0
141 for: 1m
142 labels:
143 severity: critical
144 annotations:
145 summary: "⚠️ LXC контейнер {{ \$labels.alias }} е DOWN"
146 description: "Контейнер {{ \$labels.instance }} не отговаря повече от 1 минута."
147
148 - alert: HighCPUUsage
149 expr: 100 - (avg by(instance, alias) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
150 for: 2m
151 labels:
152 severity: warning
153 annotations:
154 summary: "🔥 Високо CPU натоварване"
155 description: "{{ \$labels.alias }} ({{ \$labels.instance }}) използва {{ \$value | humanize }}% CPU"
156
157 - alert: HighMemoryUsage
158 expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100 > 80
159 for: 2m
160 labels:
161 severity: warning
162 annotations:
163 summary: "💾 Високо RAM използване"
164 description: "{{ \$labels.alias }} ({{ \$labels.instance }}) използва {{ \$value | humanize }}% RAM"
165
166 - alert: DiskSpaceLow
167 expr: (node_filesystem_avail_bytes{fstype!="tmpfs",mountpoint="/"} / node_filesystem_size_bytes{fstype!="tmpfs",mountpoint="/"}) * 100 < 20
168 for: 2m
169 labels:
170 severity: warning
171 annotations:
172 summary: "💿 Малко дисково пространство"
173 description: "{{ \$labels.alias }} ({{ \$labels.instance }}) има само {{ \$value | humanize }}% свободно място"
174
175 - alert: HighLoadAverage
176 expr: node_load5 / count(node_cpu_seconds_total{mode="idle"}) without (cpu, mode) > 2
177 for: 5m
178 labels:
179 severity: warning
180 annotations:
181 summary: "📊 Високо системно натоварване"
182 description: "{{ \$labels.alias }} има Load Average: {{ \$value | humanize }}"
183
184 - alert: NetworkErrors
185 expr: rate(node_network_receive_errs_total[5m]) > 10 or rate(node_network_transmit_errs_total[5m]) > 10
186 for: 2m
187 labels:
188 severity: warning
189 annotations:
190 summary: "🌐 Мрежови грешки"
191 description: "{{ \$labels.alias }} има {{ \$value | humanize }} грешки/сек на интерфейс {{ \$labels.device }}"
192EOF
193
194chown -R prometheus:prometheus /etc/prometheus
195
196# Systemd service за Prometheus
197cat > /etc/systemd/system/prometheus.service <<EOF
198[Unit]
199Description=Prometheus Monitoring for Proxmox LXC
200Wants=network-online.target
201After=network-online.target
202
203[Service]
204User=prometheus
205Group=prometheus
206Type=simple
207ExecStart=/usr/local/bin/prometheus \\
208 --config.file=/etc/prometheus/prometheus.yml \\
209 --storage.tsdb.path=/var/lib/prometheus/ \\
210 --web.console.templates=/etc/prometheus/consoles \\
211 --web.console.libraries=/etc/prometheus/console_libraries \\
212 --storage.tsdb.retention.time=30d
213
214[Install]
215WantedBy=multi-user.target
216EOF
217
218# Инсталация на Node Exporter за този контейнер
219echo -e "${YELLOW}[4/9] Инсталация на Node Exporter...${NC}"
220NODE_VERSION="1.6.1"
221wget -q https://github.com/prometheus/node_exporter/releases/download/v${NODE_VERSION}/node_exporter-${NODE_VERSION}.linux-amd64.tar.gz
222tar xzf node_exporter-${NODE_VERSION}.linux-amd64.tar.gz
223cp node_exporter-${NODE_VERSION}.linux-amd64/node_exporter /usr/local/bin/
224chown node_exporter:node_exporter /usr/local/bin/node_exporter
225rm -rf node_exporter-${NODE_VERSION}.linux-amd64*
226
227cat > /etc/systemd/system/node_exporter.service <<EOF
228[Unit]
229Description=Node Exporter for LXC Container
230Wants=network-online.target
231After=network-online.target
232
233[Service]
234User=node_exporter
235Group=node_exporter
236Type=simple
237ExecStart=/usr/local/bin/node_exporter
238
239[Install]
240WantedBy=multi-user.target
241EOF
242
243# Инсталация на Blackbox Exporter
244echo -e "${YELLOW}[5/9] Инсталация на Blackbox Exporter...${NC}"
245BLACKBOX_VERSION="0.24.0"
246wget -q https://github.com/prometheus/blackbox_exporter/releases/download/v${BLACKBOX_VERSION}/blackbox_exporter-${BLACKBOX_VERSION}.linux-amd64.tar.gz
247tar xzf blackbox_exporter-${BLACKBOX_VERSION}.linux-amd64.tar.gz
248cp blackbox_exporter-${BLACKBOX_VERSION}.linux-amd64/blackbox_exporter /usr/local/bin/
249chown prometheus:prometheus /usr/local/bin/blackbox_exporter
250rm -rf blackbox_exporter-${BLACKBOX_VERSION}.linux-amd64*
251
252mkdir -p /etc/blackbox_exporter
253cat > /etc/blackbox_exporter/config.yml <<EOF
254modules:
255 http_2xx:
256 prober: http
257 timeout: 5s
258 http:
259 valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
260 valid_status_codes: []
261 method: GET
262 follow_redirects: true
263
264 tcp_connect:
265 prober: tcp
266 timeout: 5s
267
268 icmp:
269 prober: icmp
270 timeout: 5s
271EOF
272
273cat > /etc/systemd/system/blackbox_exporter.service <<EOF
274[Unit]
275Description=Blackbox Exporter
276Wants=network-online.target
277After=network-online.target
278
279[Service]
280User=prometheus
281Group=prometheus
282Type=simple
283ExecStart=/usr/local/bin/blackbox_exporter --config.file=/etc/blackbox_exporter/config.yml
284
285[Install]
286WantedBy=multi-user.target
287EOF
288
289# Инсталация на Alertmanager
290echo -e "${YELLOW}[6/9] Инсталация на Alertmanager...${NC}"
291ALERT_VERSION="0.26.0"
292wget -q https://github.com/prometheus/alertmanager/releases/download/v${ALERT_VERSION}/alertmanager-${ALERT_VERSION}.linux-amd64.tar.gz
293tar xzf alertmanager-${ALERT_VERSION}.linux-amd64.tar.gz
294cp alertmanager-${ALERT_VERSION}.linux-amd64/alertmanager /usr/local/bin/
295cp alertmanager-${ALERT_VERSION}.linux-amd64/amtool /usr/local/bin/
296chown alertmanager:alertmanager /usr/local/bin/alertmanager /usr/local/bin/amtool
297rm -rf alertmanager-${ALERT_VERSION}.linux-amd64*
298
299mkdir -p /etc/alertmanager /var/lib/alertmanager
300chown -R alertmanager:alertmanager /etc/alertmanager /var/lib/alertmanager
301
302# Конфигурация на Alertmanager с Telegram
303cat > /etc/alertmanager/alertmanager.yml <<EOF
304global:
305 resolve_timeout: 5m
306
307route:
308 group_by: ['alertname', 'instance', 'alias']
309 group_wait: 10s
310 group_interval: 10s
311 repeat_interval: 12h
312 receiver: 'telegram'
313
314receivers:
315 - name: 'telegram'
316 telegram_configs:
317 - bot_token: '${TELEGRAM_TOKEN}'
318 chat_id: ${TELEGRAM_CHAT_ID}
319 parse_mode: 'HTML'
320 message: |
321 <b>{{ .Status | toUpper }}</b> - Proxmox LXC Monitor
322 {{ range .Alerts }}
323 <b>🏷 Alert:</b> {{ .Labels.alertname }}
324 <b>📍 Container:</b> {{ .Labels.alias }} ({{ .Labels.instance }})
325 <b>🔴 Severity:</b> {{ .Labels.severity }}
326 <b>📝 Summary:</b> {{ .Annotations.summary }}
327 <b>ℹ️ Details:</b> {{ .Annotations.description }}
328 <b>⏰ Started:</b> {{ .StartsAt.Format "02.01.2006 15:04:05" }}
329 {{ end }}
330
331inhibit_rules:
332 - source_match:
333 severity: 'critical'
334 target_match:
335 severity: 'warning'
336 equal: ['alertname', 'instance']
337EOF
338
339chown -R alertmanager:alertmanager /etc/alertmanager
340
341cat > /etc/systemd/system/alertmanager.service <<EOF
342[Unit]
343Description=Alertmanager for Proxmox LXC
344Wants=network-online.target
345After=network-online.target
346
347[Service]
348User=alertmanager
349Group=alertmanager
350Type=simple
351ExecStart=/usr/local/bin/alertmanager \\
352 --config.file=/etc/alertmanager/alertmanager.yml \\
353 --storage.path=/var/lib/alertmanager/
354
355[Install]
356WantedBy=multi-user.target
357EOF
358
359# Инсталация на Grafana
360echo -e "${YELLOW}[7/9] Инсталация на Grafana...${NC}"
361apt-get install -y apt-transport-https software-properties-common > /dev/null 2>&1
362wget -q -O /usr/share/keyrings/grafana.key https://apt.grafana.com/gpg.key
363echo "deb [signed-by=/usr/share/keyrings/grafana.key] https://apt.grafana.com stable main" | tee /etc/apt/sources.list.d/grafana.list > /dev/null
364apt-get update -qq
365apt-get install -y grafana > /dev/null 2>&1
366
367# Конфигурация на Grafana datasource
368mkdir -p /etc/grafana/provisioning/datasources
369cat > /etc/grafana/provisioning/datasources/prometheus.yml <<EOF
370apiVersion: 1
371
372datasources:
373 - name: Prometheus
374 type: prometheus
375 access: proxy
376 url: http://localhost:9090
377 isDefault: true
378 editable: true
379 jsonData:
380 timeInterval: "15s"
381EOF
382
383# Конфигурация на Grafana dashboards
384mkdir -p /etc/grafana/provisioning/dashboards
385cat > /etc/grafana/provisioning/dashboards/default.yml <<EOF
386apiVersion: 1
387
388providers:
389 - name: 'Proxmox LXC'
390 orgId: 1
391 folder: 'LXC Containers'
392 type: file
393 disableDeletion: false
394 updateIntervalSeconds: 10
395 allowUiUpdates: true
396 options:
397 path: /var/lib/grafana/dashboards
398EOF
399
400mkdir -p /var/lib/grafana/dashboards
401
402# Изтегляне на dashboards
403echo -e "${YELLOW}[8/9] Изтегляне на Grafana dashboards...${NC}"
404wget -q -O /var/lib/grafana/dashboards/node-exporter.json https://grafana.com/api/dashboards/1860/revisions/latest/download
405wget -q -O /var/lib/grafana/dashboards/prometheus-stats.json https://grafana.com/api/dashboards/2/revisions/latest/download
406
407chown -R grafana:grafana /var/lib/grafana/dashboards
408
409# Стартиране на всички сервизи
410echo -e "${YELLOW}[9/9] Стартиране на сервизите...${NC}"
411systemctl daemon-reload
412
413systemctl enable prometheus node_exporter blackbox_exporter alertmanager grafana-server > /dev/null 2>&1
414
415systemctl start node_exporter
416systemctl start blackbox_exporter
417systemctl start prometheus
418systemctl start alertmanager
419systemctl start grafana-server
420
421# Изчакване за стартиране
422sleep 5
423
424# Проверка на статуса
425echo -e "\n${BLUE}Проверка на статуса на сервизите...${NC}\n"
426SERVICES=("prometheus" "node_exporter" "blackbox_exporter" "alertmanager" "grafana-server")
427ALL_OK=true
428
429for service in "${SERVICES[@]}"; do
430 if systemctl is-active --quiet $service; then
431 echo -e "${GREEN}${NC} $service: Running"
432 else
433 echo -e "${RED}${NC} $service: Failed"
434 ALL_OK=false
435 fi
436done
437
438# Тригване на тестов alert
439echo -e "\n${YELLOW}Изпращане на тестов alert...${NC}"
440sleep 3
441curl -X POST http://localhost:9090/-/reload 2>/dev/null
442
443# Финален изход
444echo -e "\n${GREEN}================================${NC}"
445echo -e "${GREEN}Инсталацията завърши успешно!${NC}"
446echo -e "${GREEN}================================${NC}\n"
447
448SERVER_IP=$(hostname -I | awk '{print $1}')
449
450echo -e "${BLUE}╔════════════════════════════════════════╗${NC}"
451echo -e "${BLUE}║ Proxmox LXC Monitoring Stack ║${NC}"
452echo -e "${BLUE}╚════════════════════════════════════════╝${NC}\n"
453
454echo -e "${YELLOW}🌐 Достъп до услугите:${NC}"
455echo -e " Prometheus: http://${SERVER_IP}:9090"
456echo -e " Alertmanager: http://${SERVER_IP}:9093"
457echo -e " Grafana: http://${SERVER_IP}:3000"
458echo -e " └─ User: ${GREEN}admin${NC}"
459echo -e " └─ Pass: ${GREEN}admin${NC}"
460echo -e " Node Exporter: http://${SERVER_IP}:9100/metrics"
461echo -e " Blackbox Export: http://${SERVER_IP}:9115"
462
463echo -e "\n${YELLOW}📁 Конфигурационни файлове:${NC}"
464echo -e " Prometheus: /etc/prometheus/prometheus.yml"
465echo -e " Alerts: /etc/prometheus/alerts.yml"
466echo -e " Alertmanager: /etc/alertmanager/alertmanager.yml"
467
468echo -e "\n${YELLOW}📋 Следващи стъпки:${NC}"
469echo -e " 1. Инсталирай Node Exporter в другите LXC контейнери"
470echo -e " 2. Добави IP адресите им в: ${GREEN}/etc/prometheus/prometheus.yml${NC}"
471echo -e " 3. Рестартирай Prometheus: ${GREEN}systemctl restart prometheus${NC}"
472
473echo -e "\n${GREEN}🎉 Тестов alert ще бъде изпратен в Telegram след 5-10 секунди!${NC}"
474echo -e "${YELLOW}📱 Telegram Chat ID: ${TELEGRAM_CHAT_ID}${NC}\n"
475
476# Създаване на helper скриптове
477cat > /usr/local/bin/add-lxc-container <<'ADDSCRIPT'
478#!/bin/bash
479if [ $# -ne 2 ]; then
480 echo "Usage: add-lxc-container <IP> <Alias>"
481 echo "Example: add-lxc-container 10.0.0.101 'Web Server'"
482 exit 1
483fi
484
485IP=$1
486ALIAS=$2
487
488# Добавяне в Prometheus config преди последния празен targets
489sed -i "/- targets: \[\]/i\\ - targets: ['${IP}:9100']\n labels:\n alias: '${ALIAS}'\n type: 'lxc-container'" /etc/prometheus/prometheus.yml
490
491systemctl restart prometheus
492echo "✓ Добавен контейнер: $ALIAS ($IP)"
493echo "Провери в Prometheus: http://localhost:9090/targets"
494ADDSCRIPT
495
496chmod +x /usr/local/bin/add-lxc-container
497
498echo -e "${GREEN}✓ Създаден helper скрипт: ${YELLOW}add-lxc-container${NC}"
499echo -e " Използвай: ${GREEN}add-lxc-container 10.0.0.101 'Web Server'${NC}\n"
500
501if [ "$ALL_OK" = true ]; then
502 exit 0
503else
504 echo -e "${RED}Някои сервизи не стартираха правилно. Проверете логовете.${NC}"
505 exit 1
506fi
507