开源中间件
开源文化 ThingsBoard 开源中间件 Kubernetes DevOps KubeEdge EdgeX Foundry Node-RED
Documentation > Prometheus > Prometheus安装部署

On this page

Prometheus安装部署

1.部署Prometheus

1.1.修改配置文件

  • 准备工作
1
2
3
4
5
6
7
8
9
# 创建目录
mkdir -p /prometheus/prometheus
mkdir -p /prometheus/prometheus/data
mkdir -p /prometheus/prometheus/rules
chmod 777 -R /prometheus/prometheus/dat

# 创建配置文件
/prometheus/prometheus/prometheus.yml
touch prometheus.yml
  • 修改配置文件
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# 修改配置文件
/prometheus/prometheus/prometheus.yml
 
# prometheus.yml:
global:
  scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  
alerting:
  alertmanagers:
  - static_configs:
    - targets:
      - 172.17.88.22:9093
   
rule_files:
  - "rules/*.yml"
 
scrape_configs:
  - job_name: prometheus
    static_configs:
      - targets: ['localhost:9090']
        labels:
          instance: prometheus
     
  - job_name: alertmanager
    scrape_interval: 5s
    static_configs:
      - targets: ['172.17.88.22:9093']
        labels:
          instance: alert
 
  - job_name: centos-1
    static_configs:
      - targets: ['172.17.88.22:9100']
        labels:
          instance: node-1
     
  - job_name: centos-2
    static_configs:
      - targets: ['172.17.88.19:9100']
        labels:
          instance: node-2
 
  - job_name: centos-3
    static_configs:
      - targets: ['172.17.88.18:9100']
        labels:
          instance: node-3
 
  - job_name: docker-1
    static_configs:
      - targets: ['172.17.88.22:9601']
        labels:
          instance: node-1
     
  - job_name: docker-2
    static_configs:
      - targets: ['172.17.88.19:9601']
        labels:
          instance: node-2   
 
  - job_name: docker-3
    static_configs:
      - targets: ['172.17.88.18:9601']
        labels:
          instance: node-3     
     
  - job_name: redis-1
    static_configs:
      - targets: ['172.17.88.22:9121']
        labels:
          instance: redis-01    
     
  - job_name: redis-2
    static_configs:
      - targets: ['172.17.88.19:9121']
        labels:
          instance: redis-02    
     
  - job_name: redis-3
    static_configs:
      - targets: ['172.17.88.18:9121']
        labels:
          instance: redis-03
     
  - job_name: rabbitmq-elk
    static_configs:
      - targets: ['172.17.88.22:9419']
        labels:
          instance: mq-elk
     
  - job_name: rabbitmq-node
    static_configs:
      - targets: ['172.17.88.19:9419']
        labels:
          instance: mq-node
 
  - job_name: postgres-1
    static_configs:
      - targets: ['172.17.88.22:9187']
        labels:
          instance: pg-1
     
  - job_name: postgres-2
    static_configs:
      - targets: ['172.17.88.19:9187']
        labels:
          instance: pg-2    
     
  - job_name: es-cluster
    static_configs:
      - targets: ['172.17.88.22:9114', '172.17.88.19:9114', '172.17.88.18:9114']
        labels:
          instance: es
 
  - job_name: mysql
    static_configs:
      - targets: ['172.17.88.22:9104']
        labels:
          instance: mysqld
     
  - job_name: nginx-1
    static_configs:
      - targets: ['172.17.88.22:9913']
        labels:
          instance: nx-1
     
  - job_name: nginx-3
    static_configs:
      - targets: ['172.17.88.18:9913']
        labels:
          instance: nx-3
 
  - job_name: spring-boot-actuator
    metrics_path: '/actuator/prometheus'
    scrape_interval: 5s
    static_configs:
      - targets: ['172.17.88.22:8888']
        labels:
          instance: spring-actuator
     
  - job_name: haproxy
    scrape_interval: 5s
    static_configs:
      - targets: ['172.17.88.18:9101']
        labels:
          instance: ha1
     
  - job_name: HikariCP
    scrape_interval: 5s
    metrics_path: '/actuator/prometheus'
    static_configs:
      - targets: ['172.17.88.22:28081']
        labels:
          instance: dbPool

1.2.配置告警规则

  • 准备工作
1
2
3
4
5
6
7
8
9
# 创建目录
mkdir -p /prometheus/prometheus/rules
 
# 创建配置文件
/prometheus/prometheus/rules/memory_over.yml
/prometheus/prometheus/rules/node_down.yml
 
touch memory_over.yml
touch node_down.yml
  • 修改配置文件
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# 修改memory_over.yml
/prometheus/prometheus/rules/memory_over.yml
 
groups:
- name: example
  rules:
  - alert: NodeMemoryUsage
    expr: (1 - (node_memory_MemAvailable_bytes / (node_memory_MemTotal_bytes))) * 100 > 80
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: ": High Memory usage detected"
      description: ": Memory usage is above 80% (current value is:)"
 
 
# 修改node_down.yml
/prometheus/prometheus/rules/node_down.yml
 
groups:
- name: node-up
  rules:
  - alert: node-up
    expr: up{job="centos-3"} == 0
    for: 15s
    labels:
      severity: 1
      team: node
    annotations:
      summary: " 已停止运行!"
      description: " 检测到异常停止!请重点关注!!!"

1.3.运行Docker

1
2
3
4
5
6
7
# 运行docker 

docker run -d --network host --name prometheus --restart=always \
-v /prometheus/prometheus:/etc/prometheus \
-v /prometheus/prometheus/data:/prometheus \
-e TZ=Asia/Shanghai \
prom/prometheus
1
2
3
# 访问地址

http://47.95.238.119:9090

2.部署Alertmanager

2.1.修改配置文件

  • 准备工作
1
2
3
4
5
6
7
8
# 创建目录
mkdir -p /prometheus/alertmanager/template
mkdir -p /prometheus/alertmanager/data
chmod 777 -R /prometheus/alertmanager/data
 
# 创建配置文件
/prometheus/alertmanager/alertmanager.yml
touch alertmanager.yml
  • 修改配置文件
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# 修改配置文件
/prometheus/prometheus/alertmanager.yml
 
# alertmanager.yml:
# 一条报警记录同时发给应用、邮箱、钉钉
 
global:  # 全局配置项
  resolve_timeout: 5m #超时,默认5min
  smtp_smarthost: 'smtp.163.com:465'
  smtp_from: 'hollysys_test@163.com'
  smtp_auth_username: 'hollysys_test@163.com'
  smtp_auth_password: 'NBJAGQUIDIJNAQDF'  # 授权码:NBJAGQUIDIJNAQDF
  smtp_require_tls: false
 
 
templates:  # 定义模板信息
  - 'template/*.tmpl'   # 路径
 
 
route:  # 路由
  group_by: ['alertname'] # 报警分组依据
  group_wait: 10s #组等待时间
  group_interval: 10s # 发送前等待时间
  repeat_interval: 1h #重复周期
  receiver: default # 默认警报接收者
 
 
receivers:  # 警报接收者
- name: default #警报名称
  email_configs:
  - to: ''  #接收警报的email
    html: '' # 模板
 headers: { Subject: "[WARN] 报警邮件test" }
    send_resolved: true
 
  webhook_configs:
  - url: http://172.17.88.22:8888/monitor
  - url: http://172.17.88.22:8060/dingtalk/webhook1/send
    send_resolved: true
 
 
inhibit_rules:  # 告警抑制
  - source_match:
      severity: 'critical'
    target_match:
      severity: 'warning'
    equal: ['alertname', 'dev', 'instance']

2.2.Prometheus监控配置

1
2
3
4
5
6
7
8
# prometheus配置
 
  - job_name: alertmanager
    scrape_interval: 5s
    static_configs:
      - targets: ['172.17.88.18:9093']
        labels:
          instance: alert
1
2
3
4
# 访问地址
 
http://47.95.238.119:9093/metrics
curl '172.17.88.22:9093/metrics'

2.3.运行Docker

1
2
3
4
5
6
7
# 运行docker


docker run -d --network host --name alertmanager --restart=always \
-v /prometheus/alertmanager:/etc/alertmanager \
-v /prometheus/alertmanager/data:/alertmanager \
prom/alertmanager
1
2
3
### 访问地址

http://123.56.18.37:9093

3.部署Grafana

3.1.运行Docker

1
2
3
4
5
6
7
8
# 创建目录
mkdir -p /prometheus/grafana/data
chmod 777 -R /prometheus/grafana/data

# 运行docker 
docker run -d --network host --name=grafana --restart=always \
-v /prometheus/grafana/data:/var/lib/grafana \
grafana/grafana
1
2
3
4
5
6
# 访问地址
http://47.95.238.119:3000

# 默认账号密码
用户名:admin
密码:  admin

3.2. 配置数据源

3.3. 配置dashboard

  • 官网模板
1
2
3
4
# 访问官网地址

https://grafana.com/
https://grafana.com/grafana/dashboards

1
2
3
# 根据模板ID查找模板
ID: 8919
https://grafana.com/grafana/dashboards/8919
  • 导入官网模板
1
2
3
# 访问Grafana地址

http://123.56.18.37:3000/login