legacy.py 5.37 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
from models.database import Todo, OldTodo
from fuzzywuzzy import fuzz, process

from shared import db

import config

def lookup_todo_id(old_candidates, new_who, new_description):
    # Check for perfect matches
    for candidate in old_candidates:
        if candidate.who == new_who and candidate.description == new_description:
            return candidate.old_id
    # Accept if who has been changed
    for candidate in old_candidates:
        if candidate.description == new_description:
            return candidate.old_id
    # Do fuzzy matching on description
    content_to_number = {
        candidate.description: candidate.old_id
        for candidate in old_candidates
    }
    best_match, best_match_score = process.extractOne(
        new_description, content_to_number.keys())
    if best_match_score >= config.FUZZY_MIN_SCORE:
        print("Used fuzzy matching on '{}', got '{}' with score {}.".format(
            new_description, best_match, best_match_score))
        return content_to_number[best_match]
    else:
        print("Best match for '{}' is '{}' with score {}, rejecting.".format(
            new_description, best_match, best_match_score))
        return None


def import_old_todos(sql_text):
    protocoltype_lines = []
    protocol_lines = []
    todo_lines = []
    for line in sql_text.splitlines():
        if line.startswith("INSERT INTO `protocolManager_protocoltype`"):
            protocoltype_lines.append(line)
        elif line.startswith("INSERT INTO `protocolManager_protocol`"):
            protocol_lines.append(line)
        elif line.startswith("INSERT INTO `protocolManager_todo`"):
            todo_lines.append(line)
    if (len(protocoltype_lines) == 0
    or len(protocol_lines) == 0
    or len(todo_lines) == 0):
        raise ValueError("Necessary lines not found.")
    type_id_to_handle = {}
    for type_line in protocoltype_lines:
        for id, handle, name, mail, protocol_id in _split_insert_line(type_line):
            type_id_to_handle[int(id)] = handle.lower()
    protocol_id_to_key = {}
    for protocol_line in protocol_lines:
        for (protocol_id, type_id, date, source, textsummary, htmlsummary,
            deleted, sent, document_id) in _split_insert_line(protocol_line):
            handle = type_id_to_handle[int(type_id)]
            date_string = date [2:]
            protocol_id_to_key[int(protocol_id)] = "{}-{}".format(handle, date_string)
    todos = []
    for todo_line in todo_lines:
        for old_id, protocol_id, who, what, start_time, end_time, done in _split_insert_line(todo_line):
            protocol_id = int(protocol_id)
            if protocol_id not in protocol_id_to_key:
                print("Missing protocol with ID {} for Todo {}".format(protocol_id, what))
                continue
            todo = OldTodo(old_id=old_id, who=who, description=what,
                protocol_key=protocol_id_to_key[protocol_id])
            todos.append(todo)
    OldTodo.query.delete()
    db.session.commit()
    for todo in todos:
        db.session.add(todo)
    db.session.commit()
        
def _split_insert_line(line):
    insert_part, values_part = line.split("VALUES", 1)
    return _split_base_level(values_part)

def _split_base_level(text, begin="(", end=")", separator=",", string_terminator="'", line_end=";", ignore=" ", escape="\\"):
    raw_parts = []
    current_part = None
    index = 0
    in_string = False
    escaped = False
    for char in text:
        if escaped:
            current_part += char
            escaped = False
        elif current_part is None:
            if char == ignore:
                continue
            elif char == begin:
                current_part = ""
            elif char == line_end:
                break
            elif char == separator:
                pass
            else:
                raise ValueError(
                    "Found invalid char '{}' at position {}".format(
                        char, index))
        else:
            if in_string:
                current_part += char
                if char == escape:
                    escaped = True
                elif char == string_terminator:
                    in_string = False
            else:
                if char == string_terminator:
                    current_part += char
                    in_string = True
                elif char == end:
                    raw_parts.append(current_part)
                    current_part = None
                else:
                    current_part += char
        index += 1
    parts = []
    for part in raw_parts:
        fields = []
        current_field = ""
        in_string = False
        escaped = False
        for char in part:
            if escaped:
                current_field += char
                escaped = False
            elif in_string:
                if char == escape:
                    escaped = True
                elif char == string_terminator:
                    in_string = False
                else:
                    current_field += char
            else:
                if char == string_terminator:
                    in_string = True
                elif char == separator:
                    fields.append(current_field)
                    current_field = ""
                else:
                    current_field += char
        if len(current_field) > 0:
            fields.append(current_field)
        parts.append(fields)
    return parts