Add alertmanager role and config

54808597 · Thomas Schneider · 7465ced7 · 54808597 · 54808597 · 54808597
Commit 54808597 authored 5 years ago by Thomas Schneider
--- a/alertmanager/defaults/main.yml
+++ b/alertmanager/defaults/main.yml
+---
+prometheus_alertmanager_args: ""
+prometheus_smtp_smarthost: mail.example.org:25
+prometheus_smtp_from: monitoring@example.org
+prometheus_alert_route:
+  group_by: ['alertname', 'cluster', 'service']
+  group_wait: 30s
+  group_interval: 5m
+  repeat_interval: 3h
+  receiver: admins-mail
+  routes:
+    - match:
+        severity: critical
+        receiver: admins-pager
+prometheus_alert_inhibit_rules:
+  - source_match:
+      severity: 'critical'
+    target_match:
+      severity: 'warning'
+    equal: ['alertname', 'cluster', 'service']
+prometheus_alert_receivers:
+  - name: admins-mail
+    email_configs:
+      - to: admin@example.org
+  - name: admins-pager
+    pagerduty_configs:
+      - service_key: 42
--- a/alertmanager/handlers/main.yml
+++ b/alertmanager/handlers/main.yml
+---
+- name: Restart alertmanager
+  systemd:
+    name: prometheus-alertmanager.service
+    state: restarted
+- name: Reload alertmanager
+  systemd:
+    name: prometheus-alertmanager.service
+    state: reloaded
--- a/alertmanager/tasks/main.yml
+++ b/alertmanager/tasks/main.yml
+---
+- name: Install alertmanager
+  apt:
+    name: prometheus-alertmanager
+    state: present
+- name: Configure alertmanager command arguments
+  template:
+    src: default.j2
+    dest: /etc/default/prometheus-alertmanager
+  notify:
+    - Restart alertmanager
+  tags:
+    - config
+- name: Configure alertmanager
+  template:
+    src: alertmanager.yml.j2
+    dest: /etc/prometheus/alertmanager.yml
+  notify:
+    - Reload alertmanager
+  tags:
+    - config
--- a/alertmanager/templates/alertmanager.yml.j2
+++ b/alertmanager/templates/alertmanager.yml.j2
+global:
+  smtp_smarthost: '{{ prometheus_smtp_smarthost }}'
+  smtp_from: '{{ prometheus_smtp_from }}'
+templates:
+  - '/etc/prometheus/alertmanager_templates/*.tmpl'
+route:
+{{ prometheus_alert_route|to_yaml|indent(2, true) }}
+inhibit_rules:
+{{ prometheus_alert_inhibit_rules|to_yaml|indent(2, true) }}
+receivers:
+{{ prometheus_alert_receivers|to_yaml|indent(2, true) }}
--- a/alertmanager/templates/default.j2
+++ b/alertmanager/templates/default.j2
+# Set the command-line arguments to pass to the server.
+ARGS="{{ prometheus_alertmanager_args }}"
+# The alert manager supports the following options:
+#  --config.file="/etc/prometheus/alertmanager.yml"
+#       Alertmanager configuration file name.
+#  --storage.path="/var/lib/prometheus/alertmanager/"
+#       Base path for data storage.
+#  --data.retention=120h
+#       How long to keep data for.
+#  --alerts.gc-interval=30m
+#       Interval between alert GC.
+#  --log.level=info
+#       Only log messages with the given severity or above.
+#  --web.external-url=WEB.EXTERNAL-URL
+#       The URL under which Alertmanager is externally reachable (for example,
+#       if Alertmanager is served via a reverse proxy). Used for generating
+#       relative and absolute links back to Alertmanager itself. If the URL has
+#       a path portion, it will be used to prefix all HTTP endpoints served by
+#       Alertmanager. If omitted, relevant URL components will be derived
+#       automatically.
+#  --web.route-prefix=WEB.ROUTE-PREFIX
+#       Prefix for the internal routes of web endpoints. Defaults to path of
+#       --web.external-url.
+#  --web.listen-address=":9093"
+#       Address to listen on for the web interface and API.
+#  --web.ui-path="/usr/share/prometheus/alertmanager/ui/"
+#       Path to static UI directory.
+#  --template.default="/usr/share/prometheus/alertmanager/default.tmpl"
+#       Path to default notification template.
+#  --cluster.listen-address="0.0.0.0:9094"
+#       Listen address for cluster.
+#  --cluster.advertise-address=CLUSTER.ADVERTISE-ADDRESS
+#       Explicit address to advertise in cluster.
+#  --cluster.peer=CLUSTER.PEER ...
+#       Initial peers (may be repeated).
+#  --cluster.peer-timeout=15s
+#       Time to wait between peers to send notifications.
+#  --cluster.gossip-interval=200ms
+#       Interval between sending gossip messages. By lowering this value (more
+#       frequent) gossip messages are propagated across the cluster more
+#       quickly at the expense of increased bandwidth.
+#  --cluster.pushpull-interval=1m0s
+#       Interval for gossip state syncs. Setting this interval lower (more
+#       frequent) will increase convergence speeds across larger clusters at
+#       the expense of increased bandwidth usage.
+#  --cluster.tcp-timeout=10s  Timeout for establishing a stream connection
+#       with a remote node for a full state sync, and for stream read and write
+#       operations.
+#  --cluster.probe-timeout=500ms
+#       Timeout to wait for an ack from a probed node before assuming it is
+#       unhealthy. This should be set to 99-percentile of RTT (round-trip time)
+#       on your network.
+#  --cluster.probe-interval=1s
+#       Interval between random node probes. Setting this lower (more frequent)
+#       will cause the cluster to detect failed nodes more quickly at the
+#       expense of increased bandwidth usage.
+#  --cluster.settle-timeout=1m0s
+#       Maximum time to wait for cluster connections to settle before
+#       evaluating notifications.
+#  --cluster.reconnect-interval=10s
+#       Interval between attempting to reconnect to lost peers.
+#  --cluster.reconnect-timeout=6h0m0s
+#       Length of time to attempt to reconnect to a lost peer.
--- a/prometheus/defaults/main.yml
+++ b/prometheus/defaults/main.yml
@@ -3,3 +3,8 @@ prometheus_args: ""
 # This variable intentionally left null.  They need to exist, so that the
 # template works, but they need to have a value (empty string does not work).
 prometheus_rules: null
+prometheus_alert_relabel_configs: []
+prometheus_alertmanagers:
+  - static_configs:
+      - targets:
+          - 'localhost:9093'
--- a/prometheus/templates/prometheus.yml.j2
+++ b/prometheus/templates/prometheus.yml.j2
@@ -4,8 +4,11 @@ global:
  external_labels:
      monitor: '{{ ansible_fqdn }}'
-# TODO
+alerting:
-# alerting:
+  alert_relabel_configs:
+{{ prometheus_alert_relabel_configs|to_yaml|indent(4, true) }}
+  alertmanagers:
+{{ prometheus_alertmanagers|to_yaml|indent(4, true) }}
 rule_files:
  - /etc/prometheus/rules/*.yml