springboot metrics config

SpringBoot配置Actuator metrics的监控

Actuator 是 Spring Boot 提供的对应用系统的自省和监控功能。通过 Actuator,可以使用数据化的指标去度量应用的运行情况,比如查看服务器的磁盘、内存、CPU等信息,系统的线程、gc、运行状态等等。Prometheus指标只是其中之一,也是本次流量监控用到的主体。
Actuator 通常通过使用 HTTP 和 JMX 来管理和监控应用,大多数情况使用 HTTP 的方式。

  • Maven引入Actuator和Prometheus
    1
    2
    3
    4
    5
    6
    7
    8
    <dependency>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-actuator</artifactId>
    </dependency>
    <dependency>
    <groupId>io.micrometer</groupId>
    <artifactId>micrometer-registry-prometheus</artifactId>
    </dependency>
  • yml上配置信息
    1
    2
    3
    4
    5
    6
    management:
    endpoints:
    web:
    exposure:
    # 开启所有端点
    include: '*'
    1
    2
    3
    4
    5
    6
    spring.application.name=springboot2demo
    # 打开所有 Actuator 服务
    management.endpoints.web.exposure.include=*
    # 将应用名称添加到计量器的 tag 中去
    # 以便 Prometheus 根据应用名区分不同服务
    management.metrics.tags.application=${spring.application.name}
    在启动类中添加Bean,用于监控JVM性能指标:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    import io.micrometer.core.instrument.MeterRegistry;
    import org.springframework.beans.factory.annotation.Value;
    import org.springframework.boot.SpringApplication;
    import org.springframework.boot.actuate.autoconfigure.metrics.MeterRegistryCustomizer;
    import org.springframework.boot.autoconfigure.SpringBootApplication;
    import org.springframework.context.annotation.Bean;

    @SpringBootApplication
    public class Springboot2demoApplication {

    public static void main(String[] args) {
    SpringApplication.run(Springboot2demoApplication.class, args);
    }

    @Bean
    MeterRegistryCustomizer<MeterRegistry> configurer(
    @Value("${spring.application.name}") String applicationName) {
    return (registry) -> registry.config().commonTags("application", applicationName);
    }
    }
  • 然后就可以在项目中查看运行的指标了。
    Actuator指标:localhost:8080/actuator/metrics
    Prometheus指标:localhost:8080/actuator/prometheus

Actuator对应Prometheus的监控指标

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# HELP tomcat_sessions_rejected_sessions_total  
# TYPE tomcat_sessions_rejected_sessions_total counter
tomcat_sessions_rejected_sessions_total 0.0
# HELP tomcat_sessions_alive_max_seconds
# TYPE tomcat_sessions_alive_max_seconds gauge
tomcat_sessions_alive_max_seconds 0.0
# HELP jvm_buffer_count_buffers An estimate of the number of buffers in the pool
# TYPE jvm_buffer_count_buffers gauge
jvm_buffer_count_buffers{id="direct",} 97.0
jvm_buffer_count_buffers{id="mapped",} 0.0
# HELP process_cpu_usage The "recent cpu usage" for the Java Virtual Machine process
# TYPE process_cpu_usage gauge
process_cpu_usage 0.0
# HELP tomcat_servlet_error_total
# TYPE tomcat_servlet_error_total counter
tomcat_servlet_error_total{name="default",} 0.0
tomcat_servlet_error_total{name="jsp",} 0.0
tomcat_servlet_error_total{name="dispatcherServlet",} 0.0
tomcat_servlet_error_total{name="cn.taqu.core.web.filter.ApiDispatcherServlet",} 0.0
# HELP tomcat_global_request_seconds
# TYPE tomcat_global_request_seconds summary
tomcat_global_request_seconds_count{name="http-nio-8906",} 82.0
tomcat_global_request_seconds_sum{name="http-nio-8906",} 3.117
# HELP tomcat_cache_access_total
# TYPE tomcat_cache_access_total counter
tomcat_cache_access_total 0.0
# HELP process_uptime_seconds The uptime of the Java virtual machine
# TYPE process_uptime_seconds gauge
process_uptime_seconds 88.809
# HELP tomcat_global_request_max_seconds
# TYPE tomcat_global_request_max_seconds gauge
tomcat_global_request_max_seconds{name="http-nio-8906",} 1.246
# HELP tomcat_sessions_active_max_sessions
# TYPE tomcat_sessions_active_max_sessions gauge
tomcat_sessions_active_max_sessions 0.0
# HELP tomcat_sessions_created_sessions_total
# TYPE tomcat_sessions_created_sessions_total counter
tomcat_sessions_created_sessions_total 0.0
# HELP process_start_time_seconds Start time of the process since unix epoch.
# TYPE process_start_time_seconds gauge
process_start_time_seconds 1.610695930801E9
# HELP jvm_threads_live_threads The current number of live threads including both daemon and non-daemon threads
# TYPE jvm_threads_live_threads gauge
jvm_threads_live_threads 343.0
# HELP logback_events_total Number of error level events that made it to the logs
# TYPE logback_events_total counter
logback_events_total{level="warn",} 0.0
logback_events_total{level="debug",} 0.0
logback_events_total{level="error",} 0.0
logback_events_total{level="trace",} 0.0
logback_events_total{level="info",} 19.0
# HELP tomcat_threads_config_max_threads
# TYPE tomcat_threads_config_max_threads gauge
tomcat_threads_config_max_threads{name="http-nio-8906",} NaN
# HELP jvm_threads_states_threads The current number of threads having NEW state
# TYPE jvm_threads_states_threads gauge
jvm_threads_states_threads{state="runnable",} 117.0
jvm_threads_states_threads{state="blocked",} 0.0
jvm_threads_states_threads{state="waiting",} 209.0
jvm_threads_states_threads{state="timed-waiting",} 17.0
jvm_threads_states_threads{state="new",} 0.0
jvm_threads_states_threads{state="terminated",} 0.0
# HELP system_cpu_count The number of processors available to the Java virtual machine
# TYPE system_cpu_count gauge
system_cpu_count 12.0
# HELP http_server_requests_seconds
# TYPE http_server_requests_seconds summary
http_server_requests_seconds_count{exception="None",method="GET",outcome="SUCCESS",status="200",uri="/index",} 2.0
http_server_requests_seconds_sum{exception="None",method="GET",outcome="SUCCESS",status="200",uri="/index",} 0.8098944
http_server_requests_seconds_count{exception="None",method="POST",outcome="SUCCESS",status="200",uri="root",} 1.0
http_server_requests_seconds_sum{exception="None",method="POST",outcome="SUCCESS",status="200",uri="root",} 0.0271729
http_server_requests_seconds_count{exception="None",method="GET",outcome="SUCCESS",status="200",uri="/**",} 73.0
http_server_requests_seconds_sum{exception="None",method="GET",outcome="SUCCESS",status="200",uri="/**",} 0.4981964
http_server_requests_seconds_count{exception="None",method="POST",outcome="SUCCESS",status="200",uri="/common/getLanguageList",} 2.0
http_server_requests_seconds_sum{exception="None",method="POST",outcome="SUCCESS",status="200",uri="/common/getLanguageList",} 0.0204194
http_server_requests_seconds_count{exception="None",method="POST",outcome="SUCCESS",status="200",uri="/inspectRule/dataGrid",} 1.0
http_server_requests_seconds_sum{exception="None",method="POST",outcome="SUCCESS",status="200",uri="/inspectRule/dataGrid",} 1.2461368
http_server_requests_seconds_count{exception="None",method="GET",outcome="REDIRECTION",status="302",uri="REDIRECTION",} 1.0
http_server_requests_seconds_sum{exception="None",method="GET",outcome="REDIRECTION",status="302",uri="REDIRECTION",} 0.1364228
http_server_requests_seconds_count{exception="None",method="GET",outcome="SUCCESS",status="200",uri="/inspectRule/manager",} 1.0
http_server_requests_seconds_sum{exception="None",method="GET",outcome="SUCCESS",status="200",uri="/inspectRule/manager",} 0.1832796
http_server_requests_seconds_count{exception="None",method="POST",outcome="SUCCESS",status="200",uri="/user/addServiceSystemLog",} 1.0
http_server_requests_seconds_sum{exception="None",method="POST",outcome="SUCCESS",status="200",uri="/user/addServiceSystemLog",} 0.0549094
# HELP http_server_requests_seconds_max
# TYPE http_server_requests_seconds_max gauge
http_server_requests_seconds_max{exception="None",method="GET",outcome="SUCCESS",status="200",uri="/index",} 0.6628419
http_server_requests_seconds_max{exception="None",method="POST",outcome="SUCCESS",status="200",uri="root",} 0.0271729
http_server_requests_seconds_max{exception="None",method="GET",outcome="SUCCESS",status="200",uri="/**",} 0.0253742
http_server_requests_seconds_max{exception="None",method="POST",outcome="SUCCESS",status="200",uri="/common/getLanguageList",} 0.0133106
http_server_requests_seconds_max{exception="None",method="POST",outcome="SUCCESS",status="200",uri="/inspectRule/dataGrid",} 1.2461368
http_server_requests_seconds_max{exception="None",method="GET",outcome="REDIRECTION",status="302",uri="REDIRECTION",} 0.1364228
http_server_requests_seconds_max{exception="None",method="GET",outcome="SUCCESS",status="200",uri="/inspectRule/manager",} 0.1832796
http_server_requests_seconds_max{exception="None",method="POST",outcome="SUCCESS",status="200",uri="/user/addServiceSystemLog",} 0.0549094
# HELP jvm_gc_memory_allocated_bytes_total Incremented for an increase in the size of the young generation memory pool after one GC to before the next
# TYPE jvm_gc_memory_allocated_bytes_total counter
jvm_gc_memory_allocated_bytes_total 2.345259752E9
# HELP jvm_buffer_total_capacity_bytes An estimate of the total capacity of the buffers in this pool
# TYPE jvm_buffer_total_capacity_bytes gauge
jvm_buffer_total_capacity_bytes{id="direct",} 712848.0
jvm_buffer_total_capacity_bytes{id="mapped",} 0.0
# HELP jvm_gc_memory_promoted_bytes_total Count of positive increases in the size of the old generation memory pool before GC to after GC
# TYPE jvm_gc_memory_promoted_bytes_total counter
jvm_gc_memory_promoted_bytes_total 4.7747424E7
# HELP jvm_buffer_memory_used_bytes An estimate of the memory that the Java virtual machine is using for this buffer pool
# TYPE jvm_buffer_memory_used_bytes gauge
jvm_buffer_memory_used_bytes{id="direct",} 712849.0
jvm_buffer_memory_used_bytes{id="mapped",} 0.0
# HELP jvm_gc_pause_seconds Time spent in GC pause
# TYPE jvm_gc_pause_seconds summary
jvm_gc_pause_seconds_count{action="end of major GC",cause="Metadata GC Threshold",} 1.0
jvm_gc_pause_seconds_sum{action="end of major GC",cause="Metadata GC Threshold",} 0.136
jvm_gc_pause_seconds_count{action="end of minor GC",cause="Metadata GC Threshold",} 1.0
jvm_gc_pause_seconds_sum{action="end of minor GC",cause="Metadata GC Threshold",} 0.013
jvm_gc_pause_seconds_count{action="end of minor GC",cause="Allocation Failure",} 6.0
jvm_gc_pause_seconds_sum{action="end of minor GC",cause="Allocation Failure",} 0.069
# HELP jvm_gc_pause_seconds_max Time spent in GC pause
# TYPE jvm_gc_pause_seconds_max gauge
jvm_gc_pause_seconds_max{action="end of major GC",cause="Metadata GC Threshold",} 0.136
jvm_gc_pause_seconds_max{action="end of minor GC",cause="Metadata GC Threshold",} 0.013
jvm_gc_pause_seconds_max{action="end of minor GC",cause="Allocation Failure",} 0.023
# HELP tomcat_threads_current_threads
# TYPE tomcat_threads_current_threads gauge
tomcat_threads_current_threads{name="http-nio-8906",} NaN
# HELP tomcat_global_error_total
# TYPE tomcat_global_error_total counter
tomcat_global_error_total{name="http-nio-8906",} 0.0
# HELP jvm_threads_daemon_threads The current number of live daemon threads
# TYPE jvm_threads_daemon_threads gauge
jvm_threads_daemon_threads 237.0
# HELP tomcat_cache_hit_total
# TYPE tomcat_cache_hit_total counter
tomcat_cache_hit_total 0.0
# HELP tomcat_threads_busy_threads
# TYPE tomcat_threads_busy_threads gauge
tomcat_threads_busy_threads{name="http-nio-8906",} NaN
# HELP tomcat_servlet_request_max_seconds
# TYPE tomcat_servlet_request_max_seconds gauge
tomcat_servlet_request_max_seconds{name="default",} 0.0
tomcat_servlet_request_max_seconds{name="jsp",} 0.0
tomcat_servlet_request_max_seconds{name="dispatcherServlet",} 1.246
tomcat_servlet_request_max_seconds{name="cn.taqu.core.web.filter.ApiDispatcherServlet",} 0.0
# HELP jvm_threads_peak_threads The peak live thread count since the Java virtual machine started or peak was reset
# TYPE jvm_threads_peak_threads gauge
jvm_threads_peak_threads 345.0
# HELP jvm_classes_unloaded_classes_total The total number of classes unloaded since the Java virtual machine has started execution
# TYPE jvm_classes_unloaded_classes_total counter
jvm_classes_unloaded_classes_total 3.0
# HELP jvm_memory_committed_bytes The amount of memory in bytes that is committed for the Java virtual machine to use
# TYPE jvm_memory_committed_bytes gauge
jvm_memory_committed_bytes{area="heap",id="PS Survivor Space",} 3.2505856E7
jvm_memory_committed_bytes{area="heap",id="PS Old Gen",} 2.36453888E8
jvm_memory_committed_bytes{area="heap",id="PS Eden Space",} 4.97549312E8
jvm_memory_committed_bytes{area="nonheap",id="Metaspace",} 9.76896E7
jvm_memory_committed_bytes{area="nonheap",id="Code Cache",} 2.0709376E7
jvm_memory_committed_bytes{area="nonheap",id="Compressed Class Space",} 1.1976704E7
# HELP tomcat_sessions_active_current_sessions
# TYPE tomcat_sessions_active_current_sessions gauge
tomcat_sessions_active_current_sessions 0.0
# HELP tomcat_sessions_expired_sessions_total
# TYPE tomcat_sessions_expired_sessions_total counter
tomcat_sessions_expired_sessions_total 0.0
# HELP tomcat_global_sent_bytes_total
# TYPE tomcat_global_sent_bytes_total counter
tomcat_global_sent_bytes_total{name="http-nio-8906",} 2920966.0
# HELP jvm_gc_live_data_size_bytes Size of old generation memory pool after a full GC
# TYPE jvm_gc_live_data_size_bytes gauge
jvm_gc_live_data_size_bytes 4.3797632E7
# HELP jvm_memory_max_bytes The maximum amount of memory in bytes that can be used for memory management
# TYPE jvm_memory_max_bytes gauge
jvm_memory_max_bytes{area="heap",id="PS Survivor Space",} 3.2505856E7
jvm_memory_max_bytes{area="heap",id="PS Old Gen",} 2.841116672E9
jvm_memory_max_bytes{area="heap",id="PS Eden Space",} 1.344274432E9
jvm_memory_max_bytes{area="nonheap",id="Metaspace",} -1.0
jvm_memory_max_bytes{area="nonheap",id="Code Cache",} 2.5165824E8
jvm_memory_max_bytes{area="nonheap",id="Compressed Class Space",} 1.073741824E9
# HELP tomcat_global_received_bytes_total
# TYPE tomcat_global_received_bytes_total counter
tomcat_global_received_bytes_total{name="http-nio-8906",} 118.0
# HELP jvm_gc_max_data_size_bytes Max size of old generation memory pool
# TYPE jvm_gc_max_data_size_bytes gauge
jvm_gc_max_data_size_bytes 2.841116672E9
# HELP jvm_memory_used_bytes The amount of used memory
# TYPE jvm_memory_used_bytes gauge
jvm_memory_used_bytes{area="heap",id="PS Survivor Space",} 3.2479312E7
jvm_memory_used_bytes{area="heap",id="PS Old Gen",} 6.7252472E7
jvm_memory_used_bytes{area="heap",id="PS Eden Space",} 4.5849296E7
jvm_memory_used_bytes{area="nonheap",id="Metaspace",} 9.2626808E7
jvm_memory_used_bytes{area="nonheap",id="Code Cache",} 2.063232E7
jvm_memory_used_bytes{area="nonheap",id="Compressed Class Space",} 1.1170784E7
# HELP system_cpu_usage The "recent cpu usage" for the whole system
# TYPE system_cpu_usage gauge
system_cpu_usage 0.3472113977877134
# HELP tomcat_servlet_request_seconds
# TYPE tomcat_servlet_request_seconds summary
tomcat_servlet_request_seconds_count{name="default",} 0.0
tomcat_servlet_request_seconds_sum{name="default",} 0.0
tomcat_servlet_request_seconds_count{name="jsp",} 0.0
tomcat_servlet_request_seconds_sum{name="jsp",} 0.0
tomcat_servlet_request_seconds_count{name="dispatcherServlet",} 83.0
tomcat_servlet_request_seconds_sum{name="dispatcherServlet",} 3.0
tomcat_servlet_request_seconds_count{name="cn.taqu.core.web.filter.ApiDispatcherServlet",} 0.0
tomcat_servlet_request_seconds_sum{name="cn.taqu.core.web.filter.ApiDispatcherServlet",} 0.0
# HELP jvm_classes_loaded_classes The number of classes that are currently loaded in the Java virtual machine
# TYPE jvm_classes_loaded_classes gauge
jvm_classes_loaded_classes 15999.0

Actuator 端点说明

  • auditevents:获取当前应用暴露的审计事件信息
  • beans:获取应用中所有的 Spring Beans 的完整关系列表
  • caches:获取公开可以用的缓存
  • conditions:获取自动配置条件信息,记录哪些自动配置条件通过和没通过的原因
  • configprops:获取所有配置属性,包括默认配置,显示一个所有 @ConfigurationProperties 的整理列版本
  • env:获取所有环境变量
  • flyway:获取已应用的所有Flyway数据库迁移信息,需要一个或多个 Flyway Bean
  • liquibase:获取已应用的所有Liquibase数据库迁移。需要一个或多个 Liquibase Bean
  • health:获取应用程序健康指标(运行状况信息)
  • httptrace:获取HTTP跟踪信息(默认情况下,最近100个HTTP请求-响应交换)。需要 HttpTraceRepository Bean
  • info:获取应用程序信息
  • integrationgraph:显示 Spring Integration 图。需要依赖 spring-integration-core
  • loggers:显示和修改应用程序中日志的配置
  • logfile:返回日志文件的内容(如果已设置logging.file.name或logging.file.path属性)
  • metrics:获取系统度量指标信息
  • mappings:显示所有@RequestMapping路径的整理列表
  • scheduledtasks:显示应用程序中的计划任务
  • sessions:允许从Spring Session支持的会话存储中检索和删除用户会话。需要使用Spring Session的基于Servlet的Web应用程序
  • shutdown:关闭应用,要求endpoints.shutdown.enabled设置为true,默认为 false
  • threaddump:获取系统线程转储信息
  • heapdump:返回hprof堆转储文件
  • jolokia:通过HTTP公开JMX bean(当Jolokia在类路径上时,不适用于WebFlux)。需要依赖 jolokia-core
  • prometheus:以Prometheus服务器可以抓取的格式公开指标。需要依赖 micrometer-registry-prometheus

Prometheus/Metrics 端点说明

序号 参数 参数说明 是否监控 监控手段 重要度
JVM
1 jvm.memory.max JVM最大内存
2 jvm.memory.committed JVM可用内存 展示并监控堆内存和Metaspace 重要
3 jvm.memory.used JVM已用内存 展示并监控堆内存和Metaspace 重要
4 jvm.buffer.memory.used JVM缓冲区已用内存
5 jvm.buffer.count 当前缓冲区数
6 jvm.threads.daemon JVM守护线程数 显示在监控页面
7 jvm.threads.live JVM当前活跃线程数 显示在监控页面;监控达到阈值时报警 重要
8 jvm.threads.peak JVM峰值线程数 显示在监控页面
9 jvm.classes.loaded 加载classes数
10 jvm.classes.unloaded 未加载的classes数
11 jvm.gc.memory.allocated GC时,年轻代分配的内存空间
12 jvm.gc.memory.promoted GC时,老年代分配的内存空间
13 jvm.gc.max.data.size GC时,老年代的最大内存空间
14 jvm.gc.live.data.size FullGC时,老年代的内存空间
15 jvm.gc.pause GC耗时 显示在监控页面
TOMCAT
16 tomcat.sessions.created tomcat已创建session数
17 tomcat.sessions.expired tomcat已过期session数
18 tomcat.sessions.active.current tomcat活跃session数
19 tomcat.sessions.active.max tomcat最多活跃session数 显示在监控页面,超过阈值可报警或者进行动态扩容 重要
20 tomcat.sessions.alive.max.second tomcat最多活跃session数持续时间
21 tomcat.sessions.rejected 超过session最大配置后,拒绝的session个数 显示在监控页面,方便分析问题
22 tomcat.global.error 错误总数 显示在监控页面,方便分析问题
23 tomcat.global.sent 发送的字节数
24 tomcat.global.request.max request最长时间
25 tomcat.global.request 全局request次数和时间
26 tomcat.global.received 全局received次数和时间
27 tomcat.servlet.request servlet的请求次数和时间
28 tomcat.servlet.error servlet发生错误总数
29 tomcat.servlet.request.max servlet请求最长时间
30 tomcat.threads.busy tomcat繁忙线程 显示在监控页面,据此检查是否有线程夯住
31 tomcat.threads.current tomcat当前线程数(包括守护线程) 显示在监控页面 重要
32 tomcat.threads.config.max tomcat配置的线程最大数 显示在监控页面 重要
33 tomcat.cache.access tomcat读取缓存次数
34 tomcat.cache.hit tomcat缓存命中次数
CPU…
35 system.cpu.count CPU数量
36 system.load.average.1m load average 超过阈值报警 重要
37 system.cpu.usage 系统CPU使用率
38 process.cpu.usage 当前进程CPU使用率 超过阈值报警
39 http.server.requests http请求调用情况 显示10个请求量最大,耗时最长的URL;统计非200的请求量 重要
40 process.uptime 应用已运行时间 显示在监控页面
41 process.files.max 允许最大句柄数 配合当前打开句柄数使用
42 process.start.time 应用启动时间点 显示在监控页面
43 process.files.open 当前打开句柄数 监控文件句柄使用率,超过阈值后报警 重要

参考文章

评论