Последняя активность 1 month ago

urocibg ревизий этого фрагмента 1 month ago. К ревизии

1 file changed, 506 insertions

monitoring.sh(файл создан)

@@ -0,0 +1,506 @@
1 + #!/bin/bash
2 +
3 + # Цветове за по-добър изглед
4 + RED='\033[0;31m'
5 + GREEN='\033[0;32m'
6 + YELLOW='\033[1;33m'
7 + BLUE='\033[0;34m'
8 + NC='\033[0m' # No Color
9 +
10 + echo -e "${GREEN}================================${NC}"
11 + echo -e "${GREEN}Proxmox LXC Monitoring Stack${NC}"
12 + echo -e "${GREEN}================================${NC}\n"
13 +
14 + # Проверка за root права
15 + if [[ $EUID -ne 0 ]]; then
16 + echo -e "${RED}Този скрипт трябва да се изпълни като root!${NC}"
17 + exit 1
18 + fi
19 +
20 + # Въвеждане на Telegram данни
21 + echo -e "${YELLOW}Моля въведете Telegram Bot Token:${NC}"
22 + read -r TELEGRAM_TOKEN
23 +
24 + echo -e "${YELLOW}Моля въведете Telegram Chat ID (БЕЗ кавички):${NC}"
25 + read -r TELEGRAM_CHAT_ID
26 +
27 + # Проверка дали са въведени
28 + if [ -z "$TELEGRAM_TOKEN" ] || [ -z "$TELEGRAM_CHAT_ID" ]; then
29 + echo -e "${RED}Telegram данните са задължителни!${NC}"
30 + exit 1
31 + fi
32 +
33 + echo -e "\n${GREEN}Започва инсталацията в LXC контейнера...${NC}\n"
34 +
35 + # Актуализация на системата
36 + echo -e "${YELLOW}[1/9] Актуализация на системата...${NC}"
37 + apt-get update -qq
38 + apt-get install -y wget curl tar net-tools > /dev/null 2>&1
39 +
40 + # Създаване на потребители
41 + echo -e "${YELLOW}[2/9] Създаване на системни потребители...${NC}"
42 + useradd --no-create-home --shell /bin/false prometheus 2>/dev/null || true
43 + useradd --no-create-home --shell /bin/false node_exporter 2>/dev/null || true
44 + useradd --no-create-home --shell /bin/false alertmanager 2>/dev/null || true
45 +
46 + # Инсталация на Prometheus
47 + echo -e "${YELLOW}[3/9] Инсталация на Prometheus...${NC}"
48 + PROM_VERSION="2.47.0"
49 + wget -q https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.linux-amd64.tar.gz
50 + tar xzf prometheus-${PROM_VERSION}.linux-amd64.tar.gz
51 + cp prometheus-${PROM_VERSION}.linux-amd64/prometheus /usr/local/bin/
52 + cp prometheus-${PROM_VERSION}.linux-amd64/promtool /usr/local/bin/
53 + chown prometheus:prometheus /usr/local/bin/prometheus /usr/local/bin/promtool
54 + rm -rf prometheus-${PROM_VERSION}.linux-amd64*
55 +
56 + mkdir -p /etc/prometheus /var/lib/prometheus
57 + chown prometheus:prometheus /etc/prometheus /var/lib/prometheus
58 +
59 + # Конфигурация на Prometheus за LXC контейнери
60 + cat > /etc/prometheus/prometheus.yml <<EOF
61 + global:
62 + scrape_interval: 15s
63 + evaluation_interval: 15s
64 + external_labels:
65 + monitor: 'proxmox-lxc-monitor'
66 +
67 + alerting:
68 + alertmanagers:
69 + - static_configs:
70 + - targets:
71 + - localhost:9093
72 +
73 + rule_files:
74 + - "/etc/prometheus/alerts.yml"
75 +
76 + scrape_configs:
77 + - job_name: 'prometheus'
78 + static_configs:
79 + - targets: ['localhost:9090']
80 + labels:
81 + alias: 'Prometheus Server'
82 + type: 'monitoring'
83 +
84 + - job_name: 'monitoring-container'
85 + static_configs:
86 + - targets: ['localhost:9100']
87 + labels:
88 + alias: 'Monitoring LXC'
89 + type: 'lxc-container'
90 +
91 + - job_name: 'lxc-containers'
92 + static_configs:
93 + - targets: []
94 + # Добави тук IP адресите на другите контейнери:
95 + # - targets: ['10.0.0.101:9100']
96 + # labels:
97 + # alias: 'Web Server'
98 + # type: 'lxc-container'
99 + # - targets: ['10.0.0.102:9100']
100 + # labels:
101 + # alias: 'Database Server'
102 + # type: 'lxc-container'
103 +
104 + - job_name: 'blackbox'
105 + metrics_path: /probe
106 + params:
107 + module: [http_2xx]
108 + static_configs:
109 + - targets:
110 + - http://localhost:9090
111 + - https://www.google.com
112 + relabel_configs:
113 + - source_labels: [__address__]
114 + target_label: __param_target
115 + - source_labels: [__param_target]
116 + target_label: instance
117 + - target_label: __address__
118 + replacement: localhost:9115
119 + EOF
120 +
121 + # Създаване на alert правила
122 + cat > /etc/prometheus/alerts.yml <<EOF
123 + groups:
124 + - name: test_alerts
125 + interval: 10s
126 + rules:
127 + - alert: TestAlert
128 + expr: up{job="prometheus"} == 1
129 + for: 5s
130 + labels:
131 + severity: info
132 + annotations:
133 + summary: "🎉 Proxmox мониторингът е активен!"
134 + description: "Prometheus е конфигуриран успешно и следи LXC контейнерите."
135 +
136 + - name: lxc_container_alerts
137 + interval: 30s
138 + rules:
139 + - alert: LXCContainerDown
140 + expr: up{type="lxc-container"} == 0
141 + for: 1m
142 + labels:
143 + severity: critical
144 + annotations:
145 + summary: "⚠️ LXC контейнер {{ \$labels.alias }} е DOWN"
146 + description: "Контейнер {{ \$labels.instance }} не отговаря повече от 1 минута."
147 +
148 + - alert: HighCPUUsage
149 + expr: 100 - (avg by(instance, alias) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
150 + for: 2m
151 + labels:
152 + severity: warning
153 + annotations:
154 + summary: "🔥 Високо CPU натоварване"
155 + description: "{{ \$labels.alias }} ({{ \$labels.instance }}) използва {{ \$value | humanize }}% CPU"
156 +
157 + - alert: HighMemoryUsage
158 + expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100 > 80
159 + for: 2m
160 + labels:
161 + severity: warning
162 + annotations:
163 + summary: "💾 Високо RAM използване"
164 + description: "{{ \$labels.alias }} ({{ \$labels.instance }}) използва {{ \$value | humanize }}% RAM"
165 +
166 + - alert: DiskSpaceLow
167 + expr: (node_filesystem_avail_bytes{fstype!="tmpfs",mountpoint="/"} / node_filesystem_size_bytes{fstype!="tmpfs",mountpoint="/"}) * 100 < 20
168 + for: 2m
169 + labels:
170 + severity: warning
171 + annotations:
172 + summary: "💿 Малко дисково пространство"
173 + description: "{{ \$labels.alias }} ({{ \$labels.instance }}) има само {{ \$value | humanize }}% свободно място"
174 +
175 + - alert: HighLoadAverage
176 + expr: node_load5 / count(node_cpu_seconds_total{mode="idle"}) without (cpu, mode) > 2
177 + for: 5m
178 + labels:
179 + severity: warning
180 + annotations:
181 + summary: "📊 Високо системно натоварване"
182 + description: "{{ \$labels.alias }} има Load Average: {{ \$value | humanize }}"
183 +
184 + - alert: NetworkErrors
185 + expr: rate(node_network_receive_errs_total[5m]) > 10 or rate(node_network_transmit_errs_total[5m]) > 10
186 + for: 2m
187 + labels:
188 + severity: warning
189 + annotations:
190 + summary: "🌐 Мрежови грешки"
191 + description: "{{ \$labels.alias }} има {{ \$value | humanize }} грешки/сек на интерфейс {{ \$labels.device }}"
192 + EOF
193 +
194 + chown -R prometheus:prometheus /etc/prometheus
195 +
196 + # Systemd service за Prometheus
197 + cat > /etc/systemd/system/prometheus.service <<EOF
198 + [Unit]
199 + Description=Prometheus Monitoring for Proxmox LXC
200 + Wants=network-online.target
201 + After=network-online.target
202 +
203 + [Service]
204 + User=prometheus
205 + Group=prometheus
206 + Type=simple
207 + ExecStart=/usr/local/bin/prometheus \\
208 + --config.file=/etc/prometheus/prometheus.yml \\
209 + --storage.tsdb.path=/var/lib/prometheus/ \\
210 + --web.console.templates=/etc/prometheus/consoles \\
211 + --web.console.libraries=/etc/prometheus/console_libraries \\
212 + --storage.tsdb.retention.time=30d
213 +
214 + [Install]
215 + WantedBy=multi-user.target
216 + EOF
217 +
218 + # Инсталация на Node Exporter за този контейнер
219 + echo -e "${YELLOW}[4/9] Инсталация на Node Exporter...${NC}"
220 + NODE_VERSION="1.6.1"
221 + wget -q https://github.com/prometheus/node_exporter/releases/download/v${NODE_VERSION}/node_exporter-${NODE_VERSION}.linux-amd64.tar.gz
222 + tar xzf node_exporter-${NODE_VERSION}.linux-amd64.tar.gz
223 + cp node_exporter-${NODE_VERSION}.linux-amd64/node_exporter /usr/local/bin/
224 + chown node_exporter:node_exporter /usr/local/bin/node_exporter
225 + rm -rf node_exporter-${NODE_VERSION}.linux-amd64*
226 +
227 + cat > /etc/systemd/system/node_exporter.service <<EOF
228 + [Unit]
229 + Description=Node Exporter for LXC Container
230 + Wants=network-online.target
231 + After=network-online.target
232 +
233 + [Service]
234 + User=node_exporter
235 + Group=node_exporter
236 + Type=simple
237 + ExecStart=/usr/local/bin/node_exporter
238 +
239 + [Install]
240 + WantedBy=multi-user.target
241 + EOF
242 +
243 + # Инсталация на Blackbox Exporter
244 + echo -e "${YELLOW}[5/9] Инсталация на Blackbox Exporter...${NC}"
245 + BLACKBOX_VERSION="0.24.0"
246 + wget -q https://github.com/prometheus/blackbox_exporter/releases/download/v${BLACKBOX_VERSION}/blackbox_exporter-${BLACKBOX_VERSION}.linux-amd64.tar.gz
247 + tar xzf blackbox_exporter-${BLACKBOX_VERSION}.linux-amd64.tar.gz
248 + cp blackbox_exporter-${BLACKBOX_VERSION}.linux-amd64/blackbox_exporter /usr/local/bin/
249 + chown prometheus:prometheus /usr/local/bin/blackbox_exporter
250 + rm -rf blackbox_exporter-${BLACKBOX_VERSION}.linux-amd64*
251 +
252 + mkdir -p /etc/blackbox_exporter
253 + cat > /etc/blackbox_exporter/config.yml <<EOF
254 + modules:
255 + http_2xx:
256 + prober: http
257 + timeout: 5s
258 + http:
259 + valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
260 + valid_status_codes: []
261 + method: GET
262 + follow_redirects: true
263 +
264 + tcp_connect:
265 + prober: tcp
266 + timeout: 5s
267 +
268 + icmp:
269 + prober: icmp
270 + timeout: 5s
271 + EOF
272 +
273 + cat > /etc/systemd/system/blackbox_exporter.service <<EOF
274 + [Unit]
275 + Description=Blackbox Exporter
276 + Wants=network-online.target
277 + After=network-online.target
278 +
279 + [Service]
280 + User=prometheus
281 + Group=prometheus
282 + Type=simple
283 + ExecStart=/usr/local/bin/blackbox_exporter --config.file=/etc/blackbox_exporter/config.yml
284 +
285 + [Install]
286 + WantedBy=multi-user.target
287 + EOF
288 +
289 + # Инсталация на Alertmanager
290 + echo -e "${YELLOW}[6/9] Инсталация на Alertmanager...${NC}"
291 + ALERT_VERSION="0.26.0"
292 + wget -q https://github.com/prometheus/alertmanager/releases/download/v${ALERT_VERSION}/alertmanager-${ALERT_VERSION}.linux-amd64.tar.gz
293 + tar xzf alertmanager-${ALERT_VERSION}.linux-amd64.tar.gz
294 + cp alertmanager-${ALERT_VERSION}.linux-amd64/alertmanager /usr/local/bin/
295 + cp alertmanager-${ALERT_VERSION}.linux-amd64/amtool /usr/local/bin/
296 + chown alertmanager:alertmanager /usr/local/bin/alertmanager /usr/local/bin/amtool
297 + rm -rf alertmanager-${ALERT_VERSION}.linux-amd64*
298 +
299 + mkdir -p /etc/alertmanager /var/lib/alertmanager
300 + chown -R alertmanager:alertmanager /etc/alertmanager /var/lib/alertmanager
301 +
302 + # Конфигурация на Alertmanager с Telegram
303 + cat > /etc/alertmanager/alertmanager.yml <<EOF
304 + global:
305 + resolve_timeout: 5m
306 +
307 + route:
308 + group_by: ['alertname', 'instance', 'alias']
309 + group_wait: 10s
310 + group_interval: 10s
311 + repeat_interval: 12h
312 + receiver: 'telegram'
313 +
314 + receivers:
315 + - name: 'telegram'
316 + telegram_configs:
317 + - bot_token: '${TELEGRAM_TOKEN}'
318 + chat_id: ${TELEGRAM_CHAT_ID}
319 + parse_mode: 'HTML'
320 + message: |
321 + <b>{{ .Status | toUpper }}</b> - Proxmox LXC Monitor
322 + {{ range .Alerts }}
323 + <b>🏷 Alert:</b> {{ .Labels.alertname }}
324 + <b>📍 Container:</b> {{ .Labels.alias }} ({{ .Labels.instance }})
325 + <b>🔴 Severity:</b> {{ .Labels.severity }}
326 + <b>📝 Summary:</b> {{ .Annotations.summary }}
327 + <b>ℹ️ Details:</b> {{ .Annotations.description }}
328 + <b>⏰ Started:</b> {{ .StartsAt.Format "02.01.2006 15:04:05" }}
329 + {{ end }}
330 +
331 + inhibit_rules:
332 + - source_match:
333 + severity: 'critical'
334 + target_match:
335 + severity: 'warning'
336 + equal: ['alertname', 'instance']
337 + EOF
338 +
339 + chown -R alertmanager:alertmanager /etc/alertmanager
340 +
341 + cat > /etc/systemd/system/alertmanager.service <<EOF
342 + [Unit]
343 + Description=Alertmanager for Proxmox LXC
344 + Wants=network-online.target
345 + After=network-online.target
346 +
347 + [Service]
348 + User=alertmanager
349 + Group=alertmanager
350 + Type=simple
351 + ExecStart=/usr/local/bin/alertmanager \\
352 + --config.file=/etc/alertmanager/alertmanager.yml \\
353 + --storage.path=/var/lib/alertmanager/
354 +
355 + [Install]
356 + WantedBy=multi-user.target
357 + EOF
358 +
359 + # Инсталация на Grafana
360 + echo -e "${YELLOW}[7/9] Инсталация на Grafana...${NC}"
361 + apt-get install -y apt-transport-https software-properties-common > /dev/null 2>&1
362 + wget -q -O /usr/share/keyrings/grafana.key https://apt.grafana.com/gpg.key
363 + echo "deb [signed-by=/usr/share/keyrings/grafana.key] https://apt.grafana.com stable main" | tee /etc/apt/sources.list.d/grafana.list > /dev/null
364 + apt-get update -qq
365 + apt-get install -y grafana > /dev/null 2>&1
366 +
367 + # Конфигурация на Grafana datasource
368 + mkdir -p /etc/grafana/provisioning/datasources
369 + cat > /etc/grafana/provisioning/datasources/prometheus.yml <<EOF
370 + apiVersion: 1
371 +
372 + datasources:
373 + - name: Prometheus
374 + type: prometheus
375 + access: proxy
376 + url: http://localhost:9090
377 + isDefault: true
378 + editable: true
379 + jsonData:
380 + timeInterval: "15s"
381 + EOF
382 +
383 + # Конфигурация на Grafana dashboards
384 + mkdir -p /etc/grafana/provisioning/dashboards
385 + cat > /etc/grafana/provisioning/dashboards/default.yml <<EOF
386 + apiVersion: 1
387 +
388 + providers:
389 + - name: 'Proxmox LXC'
390 + orgId: 1
391 + folder: 'LXC Containers'
392 + type: file
393 + disableDeletion: false
394 + updateIntervalSeconds: 10
395 + allowUiUpdates: true
396 + options:
397 + path: /var/lib/grafana/dashboards
398 + EOF
399 +
400 + mkdir -p /var/lib/grafana/dashboards
401 +
402 + # Изтегляне на dashboards
403 + echo -e "${YELLOW}[8/9] Изтегляне на Grafana dashboards...${NC}"
404 + wget -q -O /var/lib/grafana/dashboards/node-exporter.json https://grafana.com/api/dashboards/1860/revisions/latest/download
405 + wget -q -O /var/lib/grafana/dashboards/prometheus-stats.json https://grafana.com/api/dashboards/2/revisions/latest/download
406 +
407 + chown -R grafana:grafana /var/lib/grafana/dashboards
408 +
409 + # Стартиране на всички сервизи
410 + echo -e "${YELLOW}[9/9] Стартиране на сервизите...${NC}"
411 + systemctl daemon-reload
412 +
413 + systemctl enable prometheus node_exporter blackbox_exporter alertmanager grafana-server > /dev/null 2>&1
414 +
415 + systemctl start node_exporter
416 + systemctl start blackbox_exporter
417 + systemctl start prometheus
418 + systemctl start alertmanager
419 + systemctl start grafana-server
420 +
421 + # Изчакване за стартиране
422 + sleep 5
423 +
424 + # Проверка на статуса
425 + echo -e "\n${BLUE}Проверка на статуса на сервизите...${NC}\n"
426 + SERVICES=("prometheus" "node_exporter" "blackbox_exporter" "alertmanager" "grafana-server")
427 + ALL_OK=true
428 +
429 + for service in "${SERVICES[@]}"; do
430 + if systemctl is-active --quiet $service; then
431 + echo -e "${GREEN}✓${NC} $service: Running"
432 + else
433 + echo -e "${RED}✗${NC} $service: Failed"
434 + ALL_OK=false
435 + fi
436 + done
437 +
438 + # Тригване на тестов alert
439 + echo -e "\n${YELLOW}Изпращане на тестов alert...${NC}"
440 + sleep 3
441 + curl -X POST http://localhost:9090/-/reload 2>/dev/null
442 +
443 + # Финален изход
444 + echo -e "\n${GREEN}================================${NC}"
445 + echo -e "${GREEN}Инсталацията завърши успешно!${NC}"
446 + echo -e "${GREEN}================================${NC}\n"
447 +
448 + SERVER_IP=$(hostname -I | awk '{print $1}')
449 +
450 + echo -e "${BLUE}╔════════════════════════════════════════╗${NC}"
451 + echo -e "${BLUE}║ Proxmox LXC Monitoring Stack ║${NC}"
452 + echo -e "${BLUE}╚════════════════════════════════════════╝${NC}\n"
453 +
454 + echo -e "${YELLOW}🌐 Достъп до услугите:${NC}"
455 + echo -e " Prometheus: http://${SERVER_IP}:9090"
456 + echo -e " Alertmanager: http://${SERVER_IP}:9093"
457 + echo -e " Grafana: http://${SERVER_IP}:3000"
458 + echo -e " └─ User: ${GREEN}admin${NC}"
459 + echo -e " └─ Pass: ${GREEN}admin${NC}"
460 + echo -e " Node Exporter: http://${SERVER_IP}:9100/metrics"
461 + echo -e " Blackbox Export: http://${SERVER_IP}:9115"
462 +
463 + echo -e "\n${YELLOW}📁 Конфигурационни файлове:${NC}"
464 + echo -e " Prometheus: /etc/prometheus/prometheus.yml"
465 + echo -e " Alerts: /etc/prometheus/alerts.yml"
466 + echo -e " Alertmanager: /etc/alertmanager/alertmanager.yml"
467 +
468 + echo -e "\n${YELLOW}📋 Следващи стъпки:${NC}"
469 + echo -e " 1. Инсталирай Node Exporter в другите LXC контейнери"
470 + echo -e " 2. Добави IP адресите им в: ${GREEN}/etc/prometheus/prometheus.yml${NC}"
471 + echo -e " 3. Рестартирай Prometheus: ${GREEN}systemctl restart prometheus${NC}"
472 +
473 + echo -e "\n${GREEN}🎉 Тестов alert ще бъде изпратен в Telegram след 5-10 секунди!${NC}"
474 + echo -e "${YELLOW}📱 Telegram Chat ID: ${TELEGRAM_CHAT_ID}${NC}\n"
475 +
476 + # Създаване на helper скриптове
477 + cat > /usr/local/bin/add-lxc-container <<'ADDSCRIPT'
478 + #!/bin/bash
479 + if [ $# -ne 2 ]; then
480 + echo "Usage: add-lxc-container <IP> <Alias>"
481 + echo "Example: add-lxc-container 10.0.0.101 'Web Server'"
482 + exit 1
483 + fi
484 +
485 + IP=$1
486 + ALIAS=$2
487 +
488 + # Добавяне в Prometheus config преди последния празен targets
489 + sed -i "/- targets: \[\]/i\\ - targets: ['${IP}:9100']\n labels:\n alias: '${ALIAS}'\n type: 'lxc-container'" /etc/prometheus/prometheus.yml
490 +
491 + systemctl restart prometheus
492 + echo "✓ Добавен контейнер: $ALIAS ($IP)"
493 + echo "Провери в Prometheus: http://localhost:9090/targets"
494 + ADDSCRIPT
495 +
496 + chmod +x /usr/local/bin/add-lxc-container
497 +
498 + echo -e "${GREEN}✓ Създаден helper скрипт: ${YELLOW}add-lxc-container${NC}"
499 + echo -e " Използвай: ${GREEN}add-lxc-container 10.0.0.101 'Web Server'${NC}\n"
500 +
501 + if [ "$ALL_OK" = true ]; then
502 + exit 0
503 + else
504 + echo -e "${RED}Някои сервизи не стартираха правилно. Проверете логовете.${NC}"
505 + exit 1
506 + fi
Новее Позже