diff --git a/lvmsnapshot.py b/lvmsnapshot.py index 9927dbfee24b2d4f5ef6a194bd29ef864b80a311..345fb8e4b9993d149c6419ebee1260ee4e884147 100644 --- a/lvmsnapshot.py +++ b/lvmsnapshot.py @@ -21,17 +21,24 @@ # sort by period # mark snapshots to keep, from oldest within period to new # delete unmarked snapshots -# periods are cumulative import os import re -from datetime import datetime +from datetime import datetime, timedelta import subprocess as sp import json from contextlib import contextmanager +from collections import OrderedDict SNAPSHOT_BASE_DIR = "/snapshots" TIMESTAMP_FORMAT = "%Y-%m-%d-%H-%M" +PERIOD_KEYS = OrderedDict([ + ("y", 365 * 86400), + ("m", 31 * 86400), + ("d", 86400), + ("H", 3600), + ("M", 60), +]) @contextmanager def xfs_freeze(mountpoint): @@ -52,6 +59,15 @@ class Volume: self.volume_group = volume_group self.name = name + def __repr__(self): + return self.get_full_name() + + def __eq__(self, other): + return hash(self) == hash(other) + + def __hash__(self): + return sum(ord(c) * i for i, c in enumerate(self.get_full_name())) + def get_full_name(self): return "{}/{}".format(self.volume_group, self.name) @@ -65,7 +81,7 @@ class Volume: "--source", mapper_device, "--json" ] - result = sp.check_output(command) + result = sp.check_output(command).decode("utf-8") data = json.loads(result) filesystems = data["filesystems"] if len(filesystems) < 1: @@ -79,6 +95,9 @@ class Snapshot: self.timestamp = timestamp self.active = active + def __repr__(self): + return self.get_name() + def get_timestamp_str(self): return self.timestamp.strftime(TIMESTAMP_FORMAT) @@ -164,7 +183,7 @@ class Snapshot: "-o", "vg_name,lv_name,lv_active,origin,lv_role", "--reportformat", "json" ] - result = sp.check_output(list_command) + result = sp.check_output(list_command).decode("utf-8") data = json.loads(result) raw_volumes = data["report"][0]["lv"] parent_name_map = {} @@ -182,17 +201,58 @@ class Snapshot: volume = Volume(volume_group, volume_name) snapshots[volume] = [] parent_name_map[volume.name] = volume - else: - print("Ignoring volume {}/{}".format(volume_group, volume_name)) # todo: remove this output for (volume_group, volume_name, active, origin, roles) in raw_snapshots: - parent_name, timestamp = Snapshot.parse_name(volume_name) - if parent_name != origin: - raise Exception("Parent volume name not matching: '{}' != '{}'".format(parent_name, origin)) - parent = parent_name_map[parent_name] - snapshot = Snapshot(parent, timestamp, active) - snapshots[parent].append(snapshot) + try: + parent_name, timestamp = Snapshot.parse_name(volume_name) + if parent_name != origin: + raise Exception("Parent volume name not matching: " + "'{}' != '{}'".format(parent_name, origin)) + parent = parent_name_map[parent_name] + snapshot = Snapshot(parent, timestamp, active) + snapshots[parent].append(snapshot) + except ValueError: + # a snapshot, but not named like the autosnapshots + # we better keep the hands off them + pass return snapshots + +class Period: + def __init__(self, target_number, interval): + self.target_number = target_number + self.interval = interval + + def __repr__(self): + return "Keep {} at interval {}d{}s (since {})".format( + self.target_number, self.interval.days, + self.interval.seconds, self.get_start()) + + def get_start(self): + return datetime.now() - self.target_number * self.interval + + @staticmethod + def build_regex(period_keys=None): + if period_keys is None: + period_keys = PERIOD_KEYS + parts = [r"(?:(?P<{0}>\d+){0})?".format(key) for key in period_keys] + return "".join(parts) + + @staticmethod + def parse_interval(text, period_keys=None): + if period_keys is None: + period_keys = PERIOD_KEYS + regex = Period.build_regex(period_keys) + match = re.fullmatch(regex, text) + if match is None: + raise Exception("Invalid interval config: '{}', " + "needs to match '{}'".format(text, regex)) + seconds = 0 + groups = match.groupdict() + for key in period_keys: + if groups[key] is not None: + seconds += period_keys[key] * int(groups[key]) + return timedelta(seconds=seconds) + def load_config(): import sys import toml @@ -205,10 +265,45 @@ def load_config(): with open(config_path, "r") as config_file: return toml.load(config_file) +def parse_config(config): + periods = {} + for volume_conf in config["volume"]: + name = volume_conf["name"] + volume_group = volume_conf["volume_group"] + volume = Volume(volume_group, name) + periods[volume] = [] + for raw_period in volume_conf["keep"]: + interval = Period.parse_interval(raw_period["interval"]) + number = int(raw_period["number"]) + periods[volume].append( + Period(target_number=number, interval=interval)) + return periods + +def mark_snapshots(snapshots, periods): + all_snapshots = set(snapshots) + marked_snapshots = set() + budgets = {period: period.target_number for period in periods} + for snapshot in sorted(snapshots, key=lambda s: s.timestamp): + for period in sorted(periods, key=Period.interval): + if (budgets[period] > 0 + and period.get_start() < snapshot.timestamp): + marked_snapshots.add(snapshot) + budgets[period] -= 1 + unmarked_snapshots = all_snapshots - marked_snapshots + return unmarked_snapshots + def main(): config = load_config() - print(config) snapshots = Snapshot.list_snapshots() + periods = parse_config(config) + for volume in set(periods.keys()) - set(snapshots.keys()): + print("Warning: Volume {} is configured but does not exist or has no snapshots.".format(volume)) + for volume in set(snapshots.keys()) - set(periods.keys()): + print("Warning: Volume {} does exist but is not configured.".format(volume)) + snapshots.pop(volume) + for volume in snapshots: + unmarked_snapshots = mark_snapshots(snapshots[volume], periods[volume]) + print(unmarked_snapshots) if __name__ == "__main__": main()