parser.py 11.5 KB
Newer Older
Robin Sonnabend's avatar
Robin Sonnabend committed
1
import regex as re
Robin Sonnabend's avatar
Robin Sonnabend committed
2
import sys
Robin Sonnabend's avatar
Robin Sonnabend committed
3
4
5
from collections import OrderedDict

class ParserException(Exception):
Robin Sonnabend's avatar
Robin Sonnabend committed
6
7
8
    name = "Parser Exception"
    has_explanation = False
    #explanation = "The source did generally not match the expected protocol syntax."
Robin Sonnabend's avatar
Robin Sonnabend committed
9
10
11
12
    def __init__(self, message, linenumber=None):
        self.message = message
        self.linenumber = linenumber

Robin Sonnabend's avatar
Robin Sonnabend committed
13
14
15
16
17
18
19
20
21
22
    def __str__(self):
        result = ""
        if self.linenumber is not None:
            result = "Exception at line {}: {}".format(self.linenumber, self.message)
        else:
            result = "Exception: {}".format(self.message)
        if self.has_explanation:
            result += "\n" + self.explanation
        return result

Robin Sonnabend's avatar
Robin Sonnabend committed
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
class Element:
    """
    Generic (abstract) base element. Should never really exist.
    Template for what an element class should contain.
    """
    def render(self):
        """
        Renders the element to TeX.
        Returns:
        - a TeX-representation of the element
        """
        return "Generic Base Syntax Element, this is not supposed to appear."

    def dump(self, level=None):
        if level is None:
            level = 0
        print("{}element".format(" " * level))

    @staticmethod
    def parse(match, current, linenumber=None):
        """
        Parses a match of this elements pattern.
        Arguments:
        - match: the match of this elements pattern
        - current: the current element of the document. Should be a fork. May be modified.
        - linenumber: the current line number, for error messages
        Returns:
        - the new current element
        - the line number after parsing this element
        """
        raise ParserException("Trying to parse the generic base element!", linenumber)

    @staticmethod
    def parse_inner(match, current, linenumber=None):
        """
        Do the parsing for every element. Checks if the match exists.
        Arguments:
        - match: the match of this elements pattern
        - current = the current element of the document. Should be a fork.
        - linenumber: the current line number, for error messages
        Returns:
        - new line number
        """
        if match is None:
            raise ParserException("Source does not match!", linenumber)
        length = match.group().count("\n")
69
        return length + (0 if linenumber is None else linenumber)
Robin Sonnabend's avatar
Robin Sonnabend committed
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85

    @staticmethod
    def parse_outer(element, current):
        """
        Handle the insertion of the object into the tree.
        Arguments:
        - element: the new parsed element to insert
        - current: the current element of the parsed document
        Returns:
        - the new current element
        """
        current.append(element)
        if isinstance(element, Fork):
            return element
        return current

Robin Sonnabend's avatar
Robin Sonnabend committed
86
    PATTERN = r"x(?<!x)" # yes, a master piece, but it should never be called
Robin Sonnabend's avatar
Robin Sonnabend committed
87
88

class Content(Element):
89
    def __init__(self, children, linenumber):
Robin Sonnabend's avatar
Robin Sonnabend committed
90
        self.children = children
91
        self.linenumber = linenumber
Robin Sonnabend's avatar
Robin Sonnabend committed
92
93
94
95
96
97
98
99
100
101
102

    def render(self):
        return "".join(map(lambda e: e.render(), self.children))

    def dump(self, level=None):
        if level is None:
            level = 0
        print("{}content:".format(" " * level))
        for child in self.children:
            child.dump(level + 1)

103
104
105
106
    def get_tags(self, tags):
        tags.extend([child for child in self.children if isinstance(child, Tag)])
        return tags

Robin Sonnabend's avatar
Robin Sonnabend committed
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
    @staticmethod
    def parse(match, current, linenumber=None):
        linenumber = Element.parse_inner(match, current, linenumber)
        if match.group("content") is None:
            raise ParserException("Content is missing its content!", linenumber)
        content = match.group("content")
        element = Content.from_content(content, linenumber)
        if len(content) == 0:
            return current, linenumber
        current = Element.parse_outer(element, current)
        return current, linenumber

    @staticmethod
    def from_content(content, linenumber):
        children = []
        while len(content) > 0:
            matched = False
            for pattern in TEXT_PATTERNS:
                match = pattern.match(content)
                if match is not None:
                    matched = True
                    children.append(TEXT_PATTERNS[pattern](match, linenumber))
                    content = content[len(match.group()):]
                    break
            if not matched:
                raise ParserException("Content does not match inner!", linenumber)
133
        return Content(children, linenumber)
Robin Sonnabend's avatar
Robin Sonnabend committed
134

Robin Sonnabend's avatar
Robin Sonnabend committed
135
136
137
138
    # v1: has problems with missing semicolons
    #PATTERN = r"\s*(?<content>(?:[^\[\];]+)?(?:\[[^\]]+\][^;\[\]]*)*);"
    # v2: does not require the semicolon, but the newline
    PATTERN = r"\s*(?<content>(?:[^\[\];\r\n]+)?(?:\[[^\]\r\n]+\][^;\[\]\r\n]*)*);?"
Robin Sonnabend's avatar
Robin Sonnabend committed
139
140

class Text:
141
    def __init__(self, text, linenumber):
Robin Sonnabend's avatar
Robin Sonnabend committed
142
        self.text = text
143
        self.linenumber = linenumber
Robin Sonnabend's avatar
Robin Sonnabend committed
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159

    def render(self):
        return self.text

    def dump(self, level=None):
        if level is None:
            level = 0
        print("{}text: {}".format(" " * level, self.text))

    @staticmethod
    def parse(match, linenumber):
        if match is None:
            raise ParserException("Text is not actually a text!", linenumber)
        content = match.group("text")
        if content is None:
            raise ParserException("Text is empty!", linenumber)
160
        return Text(content, linenumber)
Robin Sonnabend's avatar
Robin Sonnabend committed
161
162
163
164
165

    PATTERN = r"(?<text>[^\[]+)(?:(?=\[)|$)"


class Tag:
166
    def __init__(self, name, values, linenumber):
Robin Sonnabend's avatar
Robin Sonnabend committed
167
168
        self.name = name
        self.values = values
169
        self.linenumber = linenumber
Robin Sonnabend's avatar
Robin Sonnabend committed
170
171
172
173
174
175
176

    def render(self):
        return r"\textbf{{{}:}} {}".format(self.name, "; ".join(self.values));

    def dump(self, level=None):
        if level is None:
            level = 0
Robin Sonnabend's avatar
Robin Sonnabend committed
177
        print("{}tag: {}: {}".format(" " * level, self.name, "; ".join(self.values)))
Robin Sonnabend's avatar
Robin Sonnabend committed
178
179
180
181
182
183
184
185
186

    @staticmethod
    def parse(match, linenumber):
        if match is None:
            raise ParserException("Tag is not actually a tag!", linenumber)
        content = match.group("content")
        if content is None:
            raise ParserException("Tag is empty!", linenumber)
        parts = content.split(";")
187
        return Tag(parts[0], parts[1:], linenumber)
Robin Sonnabend's avatar
Robin Sonnabend committed
188
189
190
191

    PATTERN = r"\[(?<content>(?:[^;\]]*;)*(?:[^;\]]*))\]"

class Empty(Element):
192
193
    def __init__(self, linenumber):
        linenumber = linenumber
Robin Sonnabend's avatar
Robin Sonnabend committed
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210

    def render(self):
        return ""

    def dump(self, level=None):
        if level is None:
            level = 0
        print("{}empty".format(" " * level))

    @staticmethod
    def parse(match, current, linenumber=None):
        linenumber = Element.parse_inner(match, current, linenumber)
        return current, linenumber

    PATTERN = r"\s+"

class Remark(Element):
211
    def __init__(self, name, value, linenumber):
Robin Sonnabend's avatar
Robin Sonnabend committed
212
213
        self.name = name
        self.value = value
214
        self.linenumber = linenumber
Robin Sonnabend's avatar
Robin Sonnabend committed
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233

    def render(self):
        return r"\textbf{{{}}}: {}".format(self.name, self.value)

    def dump(self, level=None):
        if level is None:
            level = 0
        print("{}remark: {}: {}".format(" " * level, self.name, self.value))

    @staticmethod
    def parse(match, current, linenumber=None):
        linenumber = Element.parse_inner(match, current, linenumber)
        if match.group("content") is None:
            raise ParserException("Remark is missing its content!", linenumber)
        content = match.group("content")
        parts = content.split(";", 1)
        if len(parts) < 2:
            raise ParserException("Remark value is empty!", linenumber)
        name, value = parts
234
        element = Remark(name, value, linenumber)
Robin Sonnabend's avatar
Robin Sonnabend committed
235
236
237
238
239
240
        current = Element.parse_outer(element, current)
        return current, linenumber

    PATTERN = r"\s*\#(?<content>[^\n]+)"

class Fork(Element):
241
    def __init__(self, environment, name, parent, linenumber, children=None):
Robin Sonnabend's avatar
Robin Sonnabend committed
242
243
244
        self.environment = environment if environment is None or len(environment) > 0 else None
        self.name = name if name is None or len(name) > 0 else None
        self.parent = parent
245
        self.linenumber = linenumber
Robin Sonnabend's avatar
Robin Sonnabend committed
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
        self.children = [] if children is None else children

    def dump(self, level=None):
        if level is None:
            level = 0
        print("{}fork: {}".format(" " * level, self.name))
        for child in self.children:
            child.dump(level + 1)

    def render(self):
        return ((self.name if self.name is not None and len(self.name) > 0 else "")
            + r"\begin{itemize}" + "\n"
            + "\n".join(map(lambda e: r"\item {}".format(e.render()), self.children)) + "\n"
            + r"\end{itemize}" + "\n")

261
262
263
264
265
266
267
    def get_tags(self, tags=None):
        if tags is None:
            tags = []
        for child in self.children:
            child.get_tags(tags)
        return tags

Robin Sonnabend's avatar
Robin Sonnabend committed
268
269
270
271
272
273
274
275
    def is_anonymous(self):
        return self.environment == None

    def is_root(self):
        return self.parent is None

    @staticmethod
    def create_root():
276
        return Fork(None, None, None, 0)
Robin Sonnabend's avatar
Robin Sonnabend committed
277
278
279
280
281
282
283
284
285
286
287
288

    @staticmethod
    def parse(match, current, linenumber=None):
        linenumber = Element.parse_inner(match, current, linenumber)
        environment = match.group("environment")
        name1 = match.group("name1")
        name2 = match.group("name2")
        name = ""
        if name1 is not None:
            name = name1
        if name2 is not None:
            name += " {}".format(name2)
289
        element = Fork(environment, name, current, linenumber)
Robin Sonnabend's avatar
Robin Sonnabend committed
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
        current = Element.parse_outer(element, current)
        return current, linenumber

    @staticmethod
    def parse_end(match, current, linenumber=None):
        linenumber = Element.parse_inner(match, current, linenumber)
        if current.is_root():
            raise ParserException("Found end tag for root element!", linenumber)
        current = current.parent
        return current, linenumber

    def append(self, element):
        self.children.append(element)

    PATTERN = r"\s*(?<name1>[^{};]+)?{(?<environment>\S+)?\h*(?<name2>[^\n]+)?"
    END_PATTERN = r"\s*};?"

PATTERNS = OrderedDict([
    (re.compile(Fork.PATTERN), Fork.parse),
    (re.compile(Fork.END_PATTERN), Fork.parse_end),
    (re.compile(Remark.PATTERN), Remark.parse),
    (re.compile(Content.PATTERN), Content.parse),
    (re.compile(Empty.PATTERN), Empty.parse)
])

TEXT_PATTERNS = OrderedDict([
    (re.compile(Text.PATTERN), Text.parse),
    (re.compile(Tag.PATTERN), Tag.parse)
])

def parse(source):
    linenumber = 1
    tree = Fork.create_root()
    current = tree
    while len(source) > 0:
        found = False
        for pattern in PATTERNS:
            match = pattern.match(source)
            if match is not None:
                source = source[len(match.group()):]
                current, linenumber = PATTERNS[pattern](match, current, linenumber)
                found = True
                break
        if not found:
            raise ParserException("No matching syntax element found!", linenumber)
Robin Sonnabend's avatar
Robin Sonnabend committed
335
    if current is not tree:
336
        raise ParserException("Source ended within fork! (started at line {})".format(current.linenumber))
Robin Sonnabend's avatar
Robin Sonnabend committed
337
338
    return tree

Robin Sonnabend's avatar
Robin Sonnabend committed
339
def main(test_file_name=None):
Robin Sonnabend's avatar
Robin Sonnabend committed
340
    source = ""
Robin Sonnabend's avatar
Robin Sonnabend committed
341
342
    test_file_name = test_file_name or "source0"
    with open("test/{}.txt".format(test_file_name)) as f:
Robin Sonnabend's avatar
Robin Sonnabend committed
343
        source = f.read()
Robin Sonnabend's avatar
Robin Sonnabend committed
344
345
346
347
348
349
350
    try:
        tree = parse(source)
        tree.dump()
    except ParserException as e:
        print(e)
    else:
        print("worked!")
Robin Sonnabend's avatar
Robin Sonnabend committed
351
352
353
    

if __name__ == "__main__":
Robin Sonnabend's avatar
Robin Sonnabend committed
354
355
    test_file_name = sys.argv[1] if len(sys.argv) > 1 else None
    exit(main(test_file_name))