Skip to content
Snippets Groups Projects
Commit 0e2f133d authored by Thomas Schneider's avatar Thomas Schneider
Browse files

Initial import

parents
No related branches found
No related tags found
No related merge requests found
Showing with 382 additions and 0 deletions
parseable: true
quiet: true
use_default_rules: true
skip_list:
- '204' # line length is checked by yamllint
- '401' # git checkout must contain explicit version
- '701' # 7xx is about ansible galaxy guidelines
- '702'
- '703'
---
image: registry.git.stud.rwth-aachen.de/infra/ci-containers/asta-ansible:buster
variables:
GIT_SUBMODULE_STRATEGY: recursive
before_script:
- export LANG=en_US.UTF-8
- chmod o-w .
- ansible --version
- ansible-lint --version
- yamllint --version
stages:
- test
test:
stage: test
script:
- yamllint .
- ansible-lint ./*/
# yamllint disable-line rule:line-length
- "! rg --fixed-strings 'passwordstore' ./*/templates"
---
extends: default
rules:
comments-indentation:
level: warning
document-start:
level: error
empty-lines:
max: 1
empty-values:
forbid-in-flow-mappings: true
forbid-in-block-mappings: true
line-length:
level: warning
allow-non-breakable-inline-mappings: true
octal-values:
forbid-implicit-octal: true
level: error
# quoted-strings: enable
truthy:
level: error
---
node_exporter_args: ""
---
- name: Restart node_exporter
systemd:
name: prometheus-node-exporter.service
state: restarted
---
- name: Install node_exporter
apt:
name: prometheus-node-exporter
state: present
tags:
- prometheus
- prometheus-exporter
- name: Configure node_exporter
template:
src: prometheus-node-exporter.j2
dest: /etc/default/prometheus-node-exporter
notify:
- Restart node_exporter
tags:
- prometheus
- prometheus-exporter
- config
- name: Configure Prometheus server to scrape us
template:
src: scrape.yml.j2
dest: "/etc/prometheus/scrape/node_{{ ansible_fqdn }}.yml"
delegate_to: "{{ prometheus_host }}"
tags:
- prometheus
- prometheus-exporter
- config
# Set the command-line arguments to pass to the server.
# Due to shell scaping, to pass backslashes for regexes, you need to double
# them (\\d for \d). If running under systemd, you need to double them again
# (\\\\d to mean \d), and escape newlines too.
ARGS="{{ node_exporter_args }}"
# Prometheus-node-exporter supports the following options:
#
# --collector.diskstats.ignored-devices="^(ram|loop|fd|(h|s|v|xv)d[a-z]|nvme\\d+n\\d+p)\\d+$"
# Regexp of devices to ignore for diskstats.
# --collector.filesystem.ignored-mount-points="^/(dev|proc|run|sys|mnt|media|var/lib/docker)($|/)"
# Regexp of mount points to ignore for filesystem
# collector.
# --collector.filesystem.ignored-fs-types="^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$"
# Regexp of filesystem types to ignore for
# filesystem collector.
# --collector.netdev.ignored-devices="^lo$"
# Regexp of net devices to ignore for netdev
# collector.
# --collector.netstat.fields="^(.*_(InErrors|InErrs)|Ip_Forwarding|Ip(6|Ext)_(InOctets|OutOctets)|Icmp6?_(InMsgs|OutMsgs)|TcpExt_(Listen.*|Syncookies.*)|Tcp_(ActiveOpens|PassiveOpens|RetransSegs|CurrEstab)|Udp6?_(InDatagrams|OutDatagrams|NoPorts))$"
# Regexp of fields to return for netstat
# collector.
# --collector.ntp.server="127.0.0.1"
# NTP server to use for ntp collector
# --collector.ntp.protocol-version=4
# NTP protocol version
# --collector.ntp.server-is-local
# Certify that collector.ntp.server address is the
# same local host as this collector.
# --collector.ntp.ip-ttl=1 IP TTL to use while sending NTP query
# --collector.ntp.max-distance=3.46608s
# Max accumulated distance to the root
# --collector.ntp.local-offset-tolerance=1ms
# Offset between local clock and local ntpd time
# to tolerate
# --path.procfs="/proc" procfs mountpoint.
# --path.sysfs="/sys" sysfs mountpoint.
# --collector.qdisc.fixtures=""
# test fixtures to use for qdisc collector
# end-to-end testing
# --collector.runit.servicedir="/etc/service"
# Path to runit service directory.
# --collector.supervisord.url="http://localhost:9001/RPC2"
# XML RPC endpoint.
# --collector.systemd.unit-whitelist=".+"
# Regexp of systemd units to whitelist. Units must
# both match whitelist and not match blacklist to
# be included.
# --collector.systemd.unit-blacklist=".+(\\.device|\\.scope|\\.slice|\\.target)"
# Regexp of systemd units to blacklist. Units must
# both match whitelist and not match blacklist to
# be included.
# --collector.systemd.private
# Establish a private, direct connection to
# systemd without dbus.
# --collector.textfile.directory="/var/lib/prometheus/node-exporter"
# Directory to read text files with metrics from.
# --collector.vmstat.fields="^(oom_kill|pgpg|pswp|pg.*fault).*"
# Regexp of fields to return for vmstat collector.
# --collector.wifi.fixtures=""
# test fixtures to use for wifi collector metrics
# --collector.arp Enable the arp collector (default: enabled).
# --collector.bcache Enable the bcache collector (default: enabled).
# --collector.bonding Enable the bonding collector (default: enabled).
# --collector.buddyinfo Enable the buddyinfo collector (default:
# disabled).
# --collector.conntrack Enable the conntrack collector (default:
# enabled).
# --collector.cpu Enable the cpu collector (default: enabled).
# --collector.diskstats Enable the diskstats collector (default:
# enabled).
# --collector.drbd Enable the drbd collector (default: disabled).
# --collector.edac Enable the edac collector (default: enabled).
# --collector.entropy Enable the entropy collector (default: enabled).
# --collector.filefd Enable the filefd collector (default: enabled).
# --collector.filesystem Enable the filesystem collector (default:
# enabled).
# --collector.hwmon Enable the hwmon collector (default: enabled).
# --collector.infiniband Enable the infiniband collector (default:
# enabled).
# --collector.interrupts Enable the interrupts collector (default:
# disabled).
# --collector.ipvs Enable the ipvs collector (default: enabled).
# --collector.ksmd Enable the ksmd collector (default: disabled).
# --collector.loadavg Enable the loadavg collector (default: enabled).
# --collector.logind Enable the logind collector (default: disabled).
# --collector.mdadm Enable the mdadm collector (default: enabled).
# --collector.meminfo Enable the meminfo collector (default: enabled).
# --collector.meminfo_numa Enable the meminfo_numa collector (default:
# disabled).
# --collector.mountstats Enable the mountstats collector (default:
# disabled).
# --collector.netdev Enable the netdev collector (default: enabled).
# --collector.netstat Enable the netstat collector (default: enabled).
# --collector.nfs Enable the nfs collector (default: enabled).
# --collector.nfsd Enable the nfsd collector (default: enabled).
# --collector.ntp Enable the ntp collector (default: disabled).
# --collector.qdisc Enable the qdisc collector (default: disabled).
# --collector.runit Enable the runit collector (default: disabled).
# --collector.sockstat Enable the sockstat collector (default:
# enabled).
# --collector.stat Enable the stat collector (default: enabled).
# --collector.supervisord Enable the supervisord collector (default:
# disabled).
# --collector.systemd Enable the systemd collector (default: enabled).
# --collector.tcpstat Enable the tcpstat collector (default:
# disabled).
# --collector.textfile Enable the textfile collector (default:
# enabled).
# --collector.time Enable the time collector (default: enabled).
# --collector.uname Enable the uname collector (default: enabled).
# --collector.vmstat Enable the vmstat collector (default: enabled).
# --collector.wifi Enable the wifi collector (default: enabled).
# --collector.xfs Enable the xfs collector (default: enabled).
# --collector.zfs Enable the zfs collector (default: enabled).
# --collector.timex Enable the timex collector (default: enabled).
# --web.listen-address=":9100"
# Address on which to expose metrics and web
# interface.
# --web.telemetry-path="/metrics"
# Path under which to expose metrics.
# --log.level="info" Only log messages with the given severity or
# above. Valid levels: [debug, info, warn, error,
# fatal]
# --log.format="logger:stderr"
# Set the log target and format. Example:
# "logger:syslog?appname=bob&local=7" or
# "logger:stdout?json=true"
- targets:
- "{{ ansible_fqdn }}:9100"
---
prometheus_args: ""
prometheus:
rules:
alert:
recording:
---
- name: Restart prometheus
systemd:
name: prometheus.service
state: restarted
- name: Reload prometheus
systemd:
name: prometheus.service
state: reloaded
---
- name: Install prometheus
apt:
name:
- prometheus
state: present
tags:
- prometheus
- name: Configure prometheus command arguments
template:
src: default-prometheus.j2
dest: /etc/default/prometheus
notify:
- Restart prometheus
tags:
- prometheus
- config
- name: Configure prometheus
template:
src: prometheus.yml.j2
dest: /etc/prometheus/prometheus.yml
validate: "promtool check config %s"
notify:
- Reload prometheus
tags:
- prometheus
- config
- name: Create necessary directories
file:
path: "/etc/prometheus/{{ item }}"
state: directory
with_items:
- alertmanagers
- rules
- scrape
tags:
- prometheus
- config
- name: Configure rules
template:
src: "{{ item }}_rules.yml.j2"
dest: "/etc/prometheus/rules/ansible_{{ item }}.yml"
validate: "promtool check rules %s"
with_items:
- alert
- recording
notify:
- Reload prometheus
tags:
- prometheus
- config
{{ prometheus.rules.alert|to_nice_yaml }}
# Set the command-line arguments to pass to the server.
ARGS="{{ prometheus_args }}"
# Prometheus supports the following options:
# --config.file="/etc/prometheus/prometheus.yml"
# Prometheus configuration file path.
# --web.listen-address="0.0.0.0:9090"
# Address to listen on for UI, API, and telemetry.
# --web.read-timeout=5m Maximum duration before timing out read of the
# request, and closing idle connections.
# --web.max-connections=512 Maximum number of simultaneous connections.
# --web.external-url=<URL> The URL under which Prometheus is externally
# reachable (for example, if Prometheus is served
# via a reverse proxy). Used for generating
# relative and absolute links back to Prometheus
# itself. If the URL has a path portion, it will
# be used to prefix all HTTP endpoints served by
# Prometheus. If omitted, relevant URL components
# will be derived automatically.
# --web.route-prefix=<path> Prefix for the internal routes of web endpoints.
# Defaults to path of --web.external-url.
# --web.local-assets="/usr/share/prometheus/web/"
# Path to static asset/templates directory.
# --web.user-assets=<path> Path to static asset directory, available at
# /user.
# --web.enable-lifecycle Enable shutdown and reload via HTTP request.
# --web.enable-admin-api Enables API endpoints for admin control actions.
# --web.console.templates="/etc/prometheus/consoles"
# Path to the console template directory,
# available at /consoles.
# --web.console.libraries="/etc/prometheus/console_libraries"
# Path to the console library directory.
# --storage.tsdb.path="/var/lib/prometheus/metrics2/"
# Base path for metrics storage.
# --storage.tsdb.min-block-duration=2h
# Minimum duration of a data block before being
# persisted.
# --storage.tsdb.max-block-duration=<duration>
# Maximum duration compacted blocks may span.
# (Defaults to 10% of the retention period)
# --storage.tsdb.retention=15d
# How long to retain samples in the storage.
# --storage.tsdb.use-lockfile
# Create a lockfile in data directory.
# --alertmanager.notification-queue-capacity=10000
# The capacity of the queue for pending alert
# manager notifications.
# --alertmanager.timeout=10s
# Timeout for sending alerts to Alertmanager.
# --query.lookback-delta=5m The delta difference allowed for retrieving
# metrics during expression evaluations.
# --query.timeout=2m Maximum time a query may take before being
# aborted.
# --query.max-concurrency=20
# Maximum number of queries executed concurrently.
# --log.level=info Only log messages with the given severity or
# above. One of: [debug, info, warn, error]
global:
# Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: '{{ ansible_fqdn }}'
# TODO
# alerting:
rule_files:
- /etc/prometheus/rules/*.yml
scrape_configs:
- job_name: prometheus
# Override the global default and scrape targets from this job every 5 seconds.
scrape_interval: 5s
scrape_timeout: 5s
static_configs:
- targets:
- 'localhost:9090'
- job_name: dynamic_scrapers
file_sd_configs:
- files:
- /etc/prometheus/scrape/*.yml
- /etc/prometheus/scrape/*.json
{{ prometheus.rules.recording|to_nice_yaml }}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment