partition-cleanup.py 2.32 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/env python
# encoding: utf-8
from __future__ import print_function
import psycopg2
import re
from datetime import datetime, date, timedelta

DATE_PATTERN = r'(?P<date>\d{6})'

def get_date(name):
    match = re.search(DATE_PATTERN, name)
    date_str = match.group("date")
    year_part, month_part = date_str[:4], date_str[4:]
    return date(year=int(year_part), month=int(month_part), day=1)

def get_partitions(connection, tablename):
    cursor = connection.cursor()
    cursor.execute("SELECT relname FROM pg_inherits i "
        "JOIN pg_class c on c.oid = inhrelid "
        "WHERE inhparent = '{}'::regclass".format(tablename))
    return cursor.fetchall()

def delete_partition(connection, partition_name, partitions_schema="partitions"):
    cursor = connection.cursor()
    cursor.execute("DROP TABLE {}.{}".format(partitions_schema, partition_name))
    return connection.commit()

if __name__ == "__main__":
    today = datetime.now().date()
    import argparse
    parser = argparse.ArgumentParser("Helper script to keep the zabbix "
        "database s̶m̶a̶l̶l̶ not tooo large.")
    parser.add_argument("table", help="Tablename to remove partitions from "
        "(e.g. „public.history“).")
    parser.add_argument("--db", default="zabbix",
        help="Name of the database to operate on.")
    parser.add_argument("--months", default=3, type=int,
        help="Number of months after which partitions should be dropped.")
    parser.add_argument("--dryrun", action="store_true", default=False,
        help="Do not actually delete partitions.")
    parser.add_argument("--partitions-schema", default="partitions",
        help="Schema name of partitions.")
    arguments = parser.parse_args()

    min_drop_delta = timedelta(days=31*arguments.months)

    connection = psycopg2.connect(dbname=arguments.db)
    for partitions in get_partitions(connection, arguments.table):
        partition_name, = partitions
        partition_date = get_date(partition_name)
        should_delete = today - partition_date > min_drop_delta
        if not should_delete:
            continue
        if arguments.dryrun:
            print("Would drop partition {}.".format(partition_name))
        else:
            delete_partition(connection, partition_name,
                partitions_schema=arguments.partitions_schema)
    connection.close()