global:
# ResolveTimeout is the time after which an alert is declared resolved# if it has not been updated.
[ resolve_timeout: <duration> | default = 5m ]
# The default SMTP From header field.
[ smtp_from: <tmpl_string> ]
# The default SMTP smarthost used for sending emails.
[ smtp_smarthost: <string> ]
# The API URL to use for Slack notifications.
[ slack_api_url: <string> ]
[ pagerduty_url: <string> | default = "https://events.pagerduty.com/generic/2010-04-15/create_event.json" ]
[ opsgenie_api_host: <string> | default = "https://api.opsgenie.com/" ]
# Files from which custom notification template definitions are read.# The last component may use a wildcard matcher, e.g. 'templates/*.tmpl'.
templates:
[ - <filepath> ... ]
# The root node of the routing tree.
route: <route>
# A list of notification receivers.
receivers:
- <receiver> ...# A list of inhibition rules.
inhibit_rules:
[ - <inhibit_rule> ... ]
[ receiver: <string> ]
[ group_by: '[' <labelname>, ...']' ]
# Whether an alert should continue matching subsequent sibling nodes.
[ continue: <boolean> | default = false ]
# A set of equality matchers an alert has to fulfill to match the node.
match:
[ <labelname>: <labelvalue>, ... ]
# A set of regex-matchers an alert has to fulfill to match the node.
match_re:
[ <labelname>: <regex>, ... ]
# How long to initially wait to send a notification for a group# of alerts. Allows to wait for an inhibiting alert to arrive or collect# more initial alerts for the same group. (Usually ~0s to few minutes.)
[ group_wait: <duration> ]
# How long to wait before sending notification about new alerts that are# in are added to a group of alerts for which an initial notification# has already been sent. (Usually ~5min or more.)
[ group_interval: <duration> ]
# How long to wait before sending a notification again if it has already# been sent successfully for an alert. (Usually ~3h or more).
[ repeat_interval: <duration> ]
# Zero or more child routes.
routes:
[ - <route> ... ]
示例:
# The root route with all parameters, which are inherited by the child
# routes if they are not overwritten.
route:
receiver: 'default-receiver'
group_wait: 30s
group_interval: 5m
repeat_interval: 4h
group_by: [cluster, alertname]
# All alerts that donotmatch the following child routes
# will remain at the root node and be dispatched to'default-receiver'.
routes:
# All alerts with service=mysql or service=cassandra
# are dispatched to the database pager.
- receiver: 'database-pager'
group_wait: 10s
match_re:
service: mysql|cassandra
# All alerts with the team=frontend label match this sub-route.
# They are grouped by product and environment rather than cluster
# and alertname.
- receiver: 'frontend-pager'
group_by: [product, environment]
match:
team: frontend
# Matchers that have to be fulfilled in the alerts to be muted.
target_match:
[ <labelname>: <labelvalue>, ... ]
target_match_re:
[ <labelname>: <regex>, ... ]
# Matchers for which one or more alerts have to exist for the# inhibition to take effect.
source_match:
[ <labelname>: <labelvalue>, ... ]
source_match_re:
[ <labelname>: <regex>, ... ]
# Labels that must have an equal value in the source and target# alert for the inhibition to take effect.
[ equal: '[' <labelname>, ...']' ]
接收器 receiver
顾名思义,警报接收的配置。
通用配置格式
# The unique name of the receiver.
name: <string>
# Configurations for several notification integrations.
email_configs:
[ - <email_config>, ... ]
pagerduty_configs:
[ - <pagerduty_config>, ... ]
slack_config:
[ - <slack_config>, ... ]
opsgenie_configs:
[ - <opsgenie_config>, ... ]
webhook_configs:
[ - <webhook_config>, ... ]
邮件接收器 email_config
# Whether or not to notify about resolved alerts.
[ send_resolved: <boolean> | default = false ]
# The email address to send notifications to.
to: <tmpl_string>
# The sender address.
[ from: <tmpl_string> | default = global.smtp_from ]
# The SMTP host through which emails are sent.
[ smarthost: <string> | default = global.smtp_smarthost ]
# The HTML body of the email notification.
[ html: <tmpl_string> | default = '{{ template "email.default.html" . }}' ]
# Further headers email header key/value pairs. Overrides any headers# previously set by the notification implementation.
[ headers: { <string>: <tmpl_string>, ... } ]
Slack接收器 slack_config
# Whether or not to notify about resolved alerts.
[ send_resolved: <boolean> | default = true ]
# The Slack webhook URL.
[ api_url: <string> | default = global.slack_api_url ]
# The channel or user to send notifications to.
channel: <tmpl_string>
# API request data as defined by the Slack webhook API.
[ color: <tmpl_string> | default = '{{ ifeq.Status"firing" }}danger{{ else }}good{{ end }}' ]
[ username: <tmpl_string> | default = '{{ template"slack.default.username". }}'
[ title: <tmpl_string> | default = '{{ template"slack.default.title". }}' ]
[ title_link: <tmpl_string> | default = '{{ template"slack.default.titlelink". }}' ]
[ pretext: <tmpl_string> | default = '{{ template"slack.default.pretext". }}' ]
[ text: <tmpl_string> | default = '{{ template"slack.default.text". }}' ]
[ fallback: <tmpl_string> | default = '{{ template"slack.default.fallback". }}' ]
Webhook接收器 webhook_config
# Whether ornotto notify about resolved alerts.
[ send_resolved: <boolean> | default = true ]
# The endpoint to send HTTP POST requests to.
url: <string>
# Alert for any instance that is unreachable for >5 minutes.
ALERT InstanceDown
IF up == 0FOR5m
LABELS { severity = "page" }
ANNOTATIONS {
summary = "Instance {{ $labels.instance }} down",
description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.",
}
# Alert for any instance that have a median request latency >1s.
ALERT APIHighRequestLatency
IF api_http_request_latencies_second{quantile="0.5"} > 1FOR1m
ANNOTATIONS {
summary = "High request latency on {{ $labels.instance }}",
description = "{{ $labels.instance }} has a median request latency above 1s (current value: {{ $value }}s)",
}