parser.py 17.4 KB
Newer Older
Robin Sonnabend's avatar
Robin Sonnabend committed
1
import regex as re
Robin Sonnabend's avatar
Robin Sonnabend committed
2
import sys
Robin Sonnabend's avatar
Robin Sonnabend committed
3
from collections import OrderedDict
Robin Sonnabend's avatar
Robin Sonnabend committed
4
from enum import Enum
Robin Sonnabend's avatar
Robin Sonnabend committed
5

6
7
from shared import escape_tex

Robin Sonnabend's avatar
Robin Sonnabend committed
8
9
import config

10
11
INDENT_LETTER = "-"

Robin Sonnabend's avatar
Robin Sonnabend committed
12
class ParserException(Exception):
Robin Sonnabend's avatar
Robin Sonnabend committed
13
14
15
    name = "Parser Exception"
    has_explanation = False
    #explanation = "The source did generally not match the expected protocol syntax."
16
    def __init__(self, message, linenumber=None, tree=None):
Robin Sonnabend's avatar
Robin Sonnabend committed
17
18
        self.message = message
        self.linenumber = linenumber
19
        self.tree = tree
Robin Sonnabend's avatar
Robin Sonnabend committed
20

Robin Sonnabend's avatar
Robin Sonnabend committed
21
22
23
24
25
26
27
28
29
30
    def __str__(self):
        result = ""
        if self.linenumber is not None:
            result = "Exception at line {}: {}".format(self.linenumber, self.message)
        else:
            result = "Exception: {}".format(self.message)
        if self.has_explanation:
            result += "\n" + self.explanation
        return result

Robin Sonnabend's avatar
Robin Sonnabend committed
31
32
33
34
35
36
37
38
class RenderType(Enum):
    latex = 0
    wikitext = 1
    plaintext = 2

def _not_implemented(self, render_type):
    return NotImplementedError("The rendertype {} has not been implemented for {}.".format(render_type.name, self.__class__.__name__))

Robin Sonnabend's avatar
Robin Sonnabend committed
39
40
41
42
43
class Element:
    """
    Generic (abstract) base element. Should never really exist.
    Template for what an element class should contain.
    """
Robin Sonnabend's avatar
Robin Sonnabend committed
44
    def render(self, render_type, show_private, level=None, protocol=None):
Robin Sonnabend's avatar
Robin Sonnabend committed
45
46
47
48
49
50
51
52
53
54
        """
        Renders the element to TeX.
        Returns:
        - a TeX-representation of the element
        """
        return "Generic Base Syntax Element, this is not supposed to appear."

    def dump(self, level=None):
        if level is None:
            level = 0
55
        return "{}element".format(INDENT_LETTER * level)
Robin Sonnabend's avatar
Robin Sonnabend committed
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84

    @staticmethod
    def parse(match, current, linenumber=None):
        """
        Parses a match of this elements pattern.
        Arguments:
        - match: the match of this elements pattern
        - current: the current element of the document. Should be a fork. May be modified.
        - linenumber: the current line number, for error messages
        Returns:
        - the new current element
        - the line number after parsing this element
        """
        raise ParserException("Trying to parse the generic base element!", linenumber)

    @staticmethod
    def parse_inner(match, current, linenumber=None):
        """
        Do the parsing for every element. Checks if the match exists.
        Arguments:
        - match: the match of this elements pattern
        - current = the current element of the document. Should be a fork.
        - linenumber: the current line number, for error messages
        Returns:
        - new line number
        """
        if match is None:
            raise ParserException("Source does not match!", linenumber)
        length = match.group().count("\n")
85
        return length + (0 if linenumber is None else linenumber)
Robin Sonnabend's avatar
Robin Sonnabend committed
86
87
88
89
90
91
92
93
94
95
96
97
98
99

    @staticmethod
    def parse_outer(element, current):
        """
        Handle the insertion of the object into the tree.
        Arguments:
        - element: the new parsed element to insert
        - current: the current element of the parsed document
        Returns:
        - the new current element
        """
        current.append(element)
        if isinstance(element, Fork):
            return element
Robin Sonnabend's avatar
Robin Sonnabend committed
100
101
102
        else:
            element.fork = current
            return current
Robin Sonnabend's avatar
Robin Sonnabend committed
103

Robin Sonnabend's avatar
Robin Sonnabend committed
104
    PATTERN = r"x(?<!x)" # yes, a master piece, but it should never be called
Robin Sonnabend's avatar
Robin Sonnabend committed
105
106

class Content(Element):
107
    def __init__(self, children, linenumber):
Robin Sonnabend's avatar
Robin Sonnabend committed
108
        self.children = children
109
        self.linenumber = linenumber
Robin Sonnabend's avatar
Robin Sonnabend committed
110

Robin Sonnabend's avatar
Robin Sonnabend committed
111
112
    def render(self, render_type, show_private, level=None, protocol=None):
        return "".join(map(lambda e: e.render(render_type, show_private, level=level, protocol=protocol), self.children))
Robin Sonnabend's avatar
Robin Sonnabend committed
113
114
115
116

    def dump(self, level=None):
        if level is None:
            level = 0
117
        result_lines = ["{}content:".format(INDENT_LETTER * level)]
Robin Sonnabend's avatar
Robin Sonnabend committed
118
        for child in self.children:
119
120
            result_lines.append(child.dump(level + 1))
        return "\n".join(result_lines)
Robin Sonnabend's avatar
Robin Sonnabend committed
121

122
123
124
125
    def get_tags(self, tags):
        tags.extend([child for child in self.children if isinstance(child, Tag)])
        return tags

Robin Sonnabend's avatar
Robin Sonnabend committed
126
127
128
129
130
131
    @staticmethod
    def parse(match, current, linenumber=None):
        linenumber = Element.parse_inner(match, current, linenumber)
        if match.group("content") is None:
            raise ParserException("Content is missing its content!", linenumber)
        content = match.group("content")
Robin Sonnabend's avatar
Robin Sonnabend committed
132
        element = Content.from_content(content, current, linenumber)
Robin Sonnabend's avatar
Robin Sonnabend committed
133
134
135
136
137
138
        if len(content) == 0:
            return current, linenumber
        current = Element.parse_outer(element, current)
        return current, linenumber

    @staticmethod
Robin Sonnabend's avatar
Robin Sonnabend committed
139
    def from_content(content, current, linenumber):
Robin Sonnabend's avatar
Robin Sonnabend committed
140
141
142
143
144
145
146
        children = []
        while len(content) > 0:
            matched = False
            for pattern in TEXT_PATTERNS:
                match = pattern.match(content)
                if match is not None:
                    matched = True
Robin Sonnabend's avatar
Robin Sonnabend committed
147
                    children.append(TEXT_PATTERNS[pattern](match, current, linenumber))
Robin Sonnabend's avatar
Robin Sonnabend committed
148
149
150
151
                    content = content[len(match.group()):]
                    break
            if not matched:
                raise ParserException("Content does not match inner!", linenumber)
152
        return Content(children, linenumber)
Robin Sonnabend's avatar
Robin Sonnabend committed
153

Robin Sonnabend's avatar
Robin Sonnabend committed
154
155
156
    # v1: has problems with missing semicolons
    #PATTERN = r"\s*(?<content>(?:[^\[\];]+)?(?:\[[^\]]+\][^;\[\]]*)*);"
    # v2: does not require the semicolon, but the newline
157
158
159
    #PATTERN = r"\s*(?<content>(?:[^\[\];\r\n]+)?(?:\[[^\]\r\n]+\][^;\[\]\r\n]*)*);?"
    # v3: does not allow braces in the content
    PATTERN = r"\s*(?<content>(?:[^\[\];\r\n{}]+)?(?:\[[^\]\r\n]+\][^;\[\]\r\n]*)*);?"
Robin Sonnabend's avatar
Robin Sonnabend committed
160
161

class Text:
Robin Sonnabend's avatar
Robin Sonnabend committed
162
    def __init__(self, text, linenumber, fork):
Robin Sonnabend's avatar
Robin Sonnabend committed
163
        self.text = text
164
        self.linenumber = linenumber
Robin Sonnabend's avatar
Robin Sonnabend committed
165
        self.fork = fork
Robin Sonnabend's avatar
Robin Sonnabend committed
166

Robin Sonnabend's avatar
Robin Sonnabend committed
167
168
169
170
171
    def render(self, render_type, show_private, level=None, protocol=None):
        if render_type == RenderType.latex:
            return escape_tex(self.text)
        elif render_type == RenderType.wikitext:
            return self.text
172
        elif render_type == RenderType.plaintext:
Robin Sonnabend's avatar
Robin Sonnabend committed
173
174
175
            return self.text
        else:
            raise _not_implemented(self, render_type)
Robin Sonnabend's avatar
Robin Sonnabend committed
176
177
178
179

    def dump(self, level=None):
        if level is None:
            level = 0
180
        return "{}text: {}".format(INDENT_LETTER * level, self.text)
Robin Sonnabend's avatar
Robin Sonnabend committed
181
182

    @staticmethod
Robin Sonnabend's avatar
Robin Sonnabend committed
183
    def parse(match, current, linenumber):
Robin Sonnabend's avatar
Robin Sonnabend committed
184
185
186
187
188
        if match is None:
            raise ParserException("Text is not actually a text!", linenumber)
        content = match.group("text")
        if content is None:
            raise ParserException("Text is empty!", linenumber)
Robin Sonnabend's avatar
Robin Sonnabend committed
189
        return Text(content, linenumber, current)
Robin Sonnabend's avatar
Robin Sonnabend committed
190
191
192
193
194

    PATTERN = r"(?<text>[^\[]+)(?:(?=\[)|$)"


class Tag:
Robin Sonnabend's avatar
Robin Sonnabend committed
195
    def __init__(self, name, values, linenumber, fork):
Robin Sonnabend's avatar
Robin Sonnabend committed
196
197
        self.name = name
        self.values = values
198
        self.linenumber = linenumber
Robin Sonnabend's avatar
Robin Sonnabend committed
199
        self.fork = fork
Robin Sonnabend's avatar
Robin Sonnabend committed
200

Robin Sonnabend's avatar
Robin Sonnabend committed
201
202
203
204
205
    def render(self, render_type, show_private, level=None, protocol=None):
        if render_type == RenderType.latex:
            if self.name == "url":
                return r"\url{{{}}}".format(self.values[0])
            elif self.name == "todo":
Robin Sonnabend's avatar
Robin Sonnabend committed
206
207
                if not show_private:
                    return ""
Robin Sonnabend's avatar
Robin Sonnabend committed
208
209
                return self.todo.render_latex(current_protocol=protocol)
            return r"\textbf{{{}:}} {}".format(escape_tex(self.name.capitalize()), escape_tex(self.values[0]))
210
        elif render_type == RenderType.plaintext:
Robin Sonnabend's avatar
Robin Sonnabend committed
211
212
            if self.name == "url":
                return self.values[0]
Robin Sonnabend's avatar
Robin Sonnabend committed
213
214
215
216
            elif self.name == "todo":
                if not show_private:
                    return ""
                return self.values[0]
217
            return "{}: {}".format(self.name.capitalize(), self.values[0])
Robin Sonnabend's avatar
Robin Sonnabend committed
218
219
220
221
        elif render_type == RenderType.wikitext:
            if self.name == "url":
                return "[{0} {0}]".format(self.values[0])
            elif self.name == "todo":
Robin Sonnabend's avatar
Robin Sonnabend committed
222
223
                if not show_private:
                    return ""
Robin Sonnabend's avatar
Robin Sonnabend committed
224
225
                return self.todo.render_wikitext(current_protocol=protocol)
            return "'''{}:''' {}".format(self.name.capitalize(), self.values[0])
Robin Sonnabend's avatar
Robin Sonnabend committed
226
227
        else:
            raise _not_implemented(self, render_type)
Robin Sonnabend's avatar
Robin Sonnabend committed
228
229
230
231

    def dump(self, level=None):
        if level is None:
            level = 0
232
        return "{}tag: {}: {}".format(INDENT_LETTER * level, self.name, "; ".join(self.values))
Robin Sonnabend's avatar
Robin Sonnabend committed
233
234

    @staticmethod
Robin Sonnabend's avatar
Robin Sonnabend committed
235
    def parse(match, current, linenumber):
Robin Sonnabend's avatar
Robin Sonnabend committed
236
237
238
239
240
241
        if match is None:
            raise ParserException("Tag is not actually a tag!", linenumber)
        content = match.group("content")
        if content is None:
            raise ParserException("Tag is empty!", linenumber)
        parts = content.split(";")
Robin Sonnabend's avatar
Robin Sonnabend committed
242
        return Tag(parts[0], parts[1:], linenumber, current)
Robin Sonnabend's avatar
Robin Sonnabend committed
243
244
245
246

    PATTERN = r"\[(?<content>(?:[^;\]]*;)*(?:[^;\]]*))\]"

class Empty(Element):
247
248
    def __init__(self, linenumber):
        linenumber = linenumber
Robin Sonnabend's avatar
Robin Sonnabend committed
249

Robin Sonnabend's avatar
Robin Sonnabend committed
250
    def render(self, render_type, show_private, level=None, protocol=None):
Robin Sonnabend's avatar
Robin Sonnabend committed
251
252
253
254
255
        return ""

    def dump(self, level=None):
        if level is None:
            level = 0
256
        return "{}empty".format(INDENT_LETTER * level)
Robin Sonnabend's avatar
Robin Sonnabend committed
257
258
259
260
261
262
263
264
265

    @staticmethod
    def parse(match, current, linenumber=None):
        linenumber = Element.parse_inner(match, current, linenumber)
        return current, linenumber

    PATTERN = r"\s+"

class Remark(Element):
266
    def __init__(self, name, value, linenumber):
Robin Sonnabend's avatar
Robin Sonnabend committed
267
268
        self.name = name
        self.value = value
269
        self.linenumber = linenumber
Robin Sonnabend's avatar
Robin Sonnabend committed
270

Robin Sonnabend's avatar
Robin Sonnabend committed
271
272
273
274
275
276
277
    def render(self, render_type, show_private, level=None, protocol=None):
        if render_type == RenderType.latex:
            return r"\textbf{{{}}}: {}".format(self.name, self.value)
        elif render_type == RenderType.wikitext:
            return "{}: {}".format(self.name, self.value)
        elif render_type == RenderType.plaintext:
            return "{}: {}".format(RenderType.plaintex)
Robin Sonnabend's avatar
Robin Sonnabend committed
278
279
280
281

    def dump(self, level=None):
        if level is None:
            level = 0
282
        return "{}remark: {}: {}".format(INDENT_LETTER * level, self.name, self.value)
Robin Sonnabend's avatar
Robin Sonnabend committed
283

Robin Sonnabend's avatar
Robin Sonnabend committed
284
285
286
    def get_tags(self, tags):
        return tags

Robin Sonnabend's avatar
Robin Sonnabend committed
287
288
289
290
291
292
293
294
295
296
    @staticmethod
    def parse(match, current, linenumber=None):
        linenumber = Element.parse_inner(match, current, linenumber)
        if match.group("content") is None:
            raise ParserException("Remark is missing its content!", linenumber)
        content = match.group("content")
        parts = content.split(";", 1)
        if len(parts) < 2:
            raise ParserException("Remark value is empty!", linenumber)
        name, value = parts
297
        element = Remark(name, value, linenumber)
Robin Sonnabend's avatar
Robin Sonnabend committed
298
299
300
301
302
303
        current = Element.parse_outer(element, current)
        return current, linenumber

    PATTERN = r"\s*\#(?<content>[^\n]+)"

class Fork(Element):
304
    def __init__(self, environment, name, parent, linenumber, children=None):
Robin Sonnabend's avatar
Robin Sonnabend committed
305
        self.environment = environment if environment is None or len(environment) > 0 else None
306
        self.name = name.strip() if (name is not None and len(name) > 0) else None
Robin Sonnabend's avatar
Robin Sonnabend committed
307
        self.parent = parent
308
        self.linenumber = linenumber
Robin Sonnabend's avatar
Robin Sonnabend committed
309
310
311
312
313
        self.children = [] if children is None else children

    def dump(self, level=None):
        if level is None:
            level = 0
314
        result_lines = ["{}fork: {}".format(INDENT_LETTER * level, self.name)]
Robin Sonnabend's avatar
Robin Sonnabend committed
315
        for child in self.children:
316
317
            result_lines.append(child.dump(level + 1))
        return "\n".join(result_lines)
Robin Sonnabend's avatar
Robin Sonnabend committed
318

Robin Sonnabend's avatar
Robin Sonnabend committed
319
320
    def test_private(self, name):
        stripped_name = name.replace(":", "").strip()
Robin Sonnabend's avatar
Robin Sonnabend committed
321
        return stripped_name in config.PRIVATE_KEYWORDS
Robin Sonnabend's avatar
Robin Sonnabend committed
322

Robin Sonnabend's avatar
Robin Sonnabend committed
323
    def render(self, render_type, show_private, level, protocol=None):
324
325
326
327
328
329
        name_parts = []
        if self.environment is not None:
            name_parts.append(self.environment)
        if self.name is not None:
            name_parts.append(self.name)
        name_line = " ".join(name_parts)
Robin Sonnabend's avatar
Robin Sonnabend committed
330
331
        if level == 0 and self.name == "Todos" and not show_private:
            return ""
Robin Sonnabend's avatar
Robin Sonnabend committed
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
        if render_type == RenderType.latex:
            begin_line = r"\begin{itemize}"
            end_line = r"\end{itemize}"
            content_parts = []
            for child in self.children:
                part = child.render(render_type, show_private, level=level+1, protocol=protocol)
                if len(part.strip()) == 0:
                    continue
                if not part.startswith(r"\item"):
                    part = r"\item {}".format(part)
                content_parts.append(part)
            content_lines = "\n".join(content_parts)
            if level == 0:
                return "\n".join([begin_line, content_lines, end_line])
            elif self.test_private(self.name):
                if show_private:
                    return content_lines
                else:
                    return ""
Robin Sonnabend's avatar
Robin Sonnabend committed
351
            else:
Robin Sonnabend's avatar
Robin Sonnabend committed
352
353
                return "\n".join([name_line, begin_line, content_lines, end_line])
        elif render_type == RenderType.wikitext:
Robin Sonnabend's avatar
Robin Sonnabend committed
354
            title_line = "{0} {1} {0}".format("=" * (level + 2), name_line)
Robin Sonnabend's avatar
Robin Sonnabend committed
355
356
357
358
359
360
            content_parts = []
            for child in self.children:
                part = child.render(render_type, show_private, level=level+1, protocol=protocol)
                if len(part.strip()) == 0:
                    continue
                content_parts.append(part)
Robin Sonnabend's avatar
Robin Sonnabend committed
361
            content_lines = "{}\n\n{}\n".format(title_line, "\n\n".join(content_parts))
Robin Sonnabend's avatar
Robin Sonnabend committed
362
            if self.test_private(self.name) and not show_private:
Robin Sonnabend's avatar
Robin Sonnabend committed
363
                return ""
Robin Sonnabend's avatar
Robin Sonnabend committed
364
365
366
367
368
369
            else:
                return content_lines
        elif render_type == RenderType.plaintext:
            title_line = "{} {}".format("#" * (level + 1), name_line)
            content_parts = []
            for child in self.children:
370
                part = child.render(render_type, show_private, level=level+1, protocol=protocol)
Robin Sonnabend's avatar
Robin Sonnabend committed
371
372
373
374
375
376
377
378
                if len(part.strip()) == 0:
                    continue
                content_parts.append(part)
            content_lines = "{}\n{}".format(title_line, "\n".join(content_parts))
            if self.test_private(self.name) and not show_private:
                return ""
            else:
                return content_lines
Robin Sonnabend's avatar
Robin Sonnabend committed
379
        else:
Robin Sonnabend's avatar
Robin Sonnabend committed
380
381
            raise _not_implemented(self, render_type)

Robin Sonnabend's avatar
Robin Sonnabend committed
382

383
384
385
386
387
388
389
    def get_tags(self, tags=None):
        if tags is None:
            tags = []
        for child in self.children:
            child.get_tags(tags)
        return tags

Robin Sonnabend's avatar
Robin Sonnabend committed
390
391
392
393
394
395
    def is_anonymous(self):
        return self.environment == None

    def is_root(self):
        return self.parent is None

Robin Sonnabend's avatar
Robin Sonnabend committed
396
397
398
399
400
    def get_top(self):
        if self.is_root() or self.parent.is_root():
            return self
        return self.parent.get_top()

Robin Sonnabend's avatar
Robin Sonnabend committed
401
402
    @staticmethod
    def create_root():
403
        return Fork(None, None, None, 0)
Robin Sonnabend's avatar
Robin Sonnabend committed
404
405
406
407
408
409
410
411
412
413
414

    @staticmethod
    def parse(match, current, linenumber=None):
        linenumber = Element.parse_inner(match, current, linenumber)
        environment = match.group("environment")
        name1 = match.group("name1")
        name2 = match.group("name2")
        name = ""
        if name1 is not None:
            name = name1
        if name2 is not None:
415
416
417
418
            if len(name) > 0:
                name += " {}".format(name2)
            else:
                name = name2
419
        element = Fork(environment, name, current, linenumber)
Robin Sonnabend's avatar
Robin Sonnabend committed
420
421
422
423
424
425
426
427
428
429
430
431
432
433
        current = Element.parse_outer(element, current)
        return current, linenumber

    @staticmethod
    def parse_end(match, current, linenumber=None):
        linenumber = Element.parse_inner(match, current, linenumber)
        if current.is_root():
            raise ParserException("Found end tag for root element!", linenumber)
        current = current.parent
        return current, linenumber

    def append(self, element):
        self.children.append(element)

434
435
436
437
    # v1: has a problem with old protocols that do not use a lot of semicolons
    #PATTERN = r"\s*(?<name1>[^{};]+)?{(?<environment>\S+)?\h*(?<name2>[^\n]+)?"
    # v2: do not allow newlines in name1 or semicolons in name2
    PATTERN = r"\s*(?<name1>[^{};\n]+)?{(?<environment>\S+)?\h*(?<name2>[^;\n]+)?"
Robin Sonnabend's avatar
Robin Sonnabend committed
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
    END_PATTERN = r"\s*};?"

PATTERNS = OrderedDict([
    (re.compile(Fork.PATTERN), Fork.parse),
    (re.compile(Fork.END_PATTERN), Fork.parse_end),
    (re.compile(Remark.PATTERN), Remark.parse),
    (re.compile(Content.PATTERN), Content.parse),
    (re.compile(Empty.PATTERN), Empty.parse)
])

TEXT_PATTERNS = OrderedDict([
    (re.compile(Text.PATTERN), Text.parse),
    (re.compile(Tag.PATTERN), Tag.parse)
])

def parse(source):
    linenumber = 1
    tree = Fork.create_root()
    current = tree
    while len(source) > 0:
        found = False
        for pattern in PATTERNS:
            match = pattern.match(source)
            if match is not None:
                source = source[len(match.group()):]
                current, linenumber = PATTERNS[pattern](match, current, linenumber)
                found = True
                break
        if not found:
            raise ParserException("No matching syntax element found!", linenumber)
Robin Sonnabend's avatar
Robin Sonnabend committed
468
    if current is not tree:
469
        raise ParserException("Source ended within fork! (started at line {})".format(current.linenumber), linenumber=current.linenumber, tree=tree)
Robin Sonnabend's avatar
Robin Sonnabend committed
470
471
    return tree

Robin Sonnabend's avatar
Robin Sonnabend committed
472
def main(test_file_name=None):
Robin Sonnabend's avatar
Robin Sonnabend committed
473
    source = ""
Robin Sonnabend's avatar
Robin Sonnabend committed
474
475
    test_file_name = test_file_name or "source0"
    with open("test/{}.txt".format(test_file_name)) as f:
Robin Sonnabend's avatar
Robin Sonnabend committed
476
        source = f.read()
Robin Sonnabend's avatar
Robin Sonnabend committed
477
478
479
480
481
482
483
    try:
        tree = parse(source)
        tree.dump()
    except ParserException as e:
        print(e)
    else:
        print("worked!")
Robin Sonnabend's avatar
Robin Sonnabend committed
484
485
486
    

if __name__ == "__main__":
Robin Sonnabend's avatar
Robin Sonnabend committed
487
488
    test_file_name = sys.argv[1] if len(sys.argv) > 1 else None
    exit(main(test_file_name))