parser.py 12.7 KB
Newer Older
Robin Sonnabend's avatar
Robin Sonnabend committed
1
import regex as re
Robin Sonnabend's avatar
Robin Sonnabend committed
2
import sys
Robin Sonnabend's avatar
Robin Sonnabend committed
3
4
from collections import OrderedDict

5
6
from shared import escape_tex

Robin Sonnabend's avatar
Robin Sonnabend committed
7
8
import config

Robin Sonnabend's avatar
Robin Sonnabend committed
9
class ParserException(Exception):
Robin Sonnabend's avatar
Robin Sonnabend committed
10
11
12
    name = "Parser Exception"
    has_explanation = False
    #explanation = "The source did generally not match the expected protocol syntax."
Robin Sonnabend's avatar
Robin Sonnabend committed
13
14
15
16
    def __init__(self, message, linenumber=None):
        self.message = message
        self.linenumber = linenumber

Robin Sonnabend's avatar
Robin Sonnabend committed
17
18
19
20
21
22
23
24
25
26
    def __str__(self):
        result = ""
        if self.linenumber is not None:
            result = "Exception at line {}: {}".format(self.linenumber, self.message)
        else:
            result = "Exception: {}".format(self.message)
        if self.has_explanation:
            result += "\n" + self.explanation
        return result

Robin Sonnabend's avatar
Robin Sonnabend committed
27
28
29
30
31
class Element:
    """
    Generic (abstract) base element. Should never really exist.
    Template for what an element class should contain.
    """
Robin Sonnabend's avatar
Robin Sonnabend committed
32
    def render(self, show_private):
Robin Sonnabend's avatar
Robin Sonnabend committed
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
        """
        Renders the element to TeX.
        Returns:
        - a TeX-representation of the element
        """
        return "Generic Base Syntax Element, this is not supposed to appear."

    def dump(self, level=None):
        if level is None:
            level = 0
        print("{}element".format(" " * level))

    @staticmethod
    def parse(match, current, linenumber=None):
        """
        Parses a match of this elements pattern.
        Arguments:
        - match: the match of this elements pattern
        - current: the current element of the document. Should be a fork. May be modified.
        - linenumber: the current line number, for error messages
        Returns:
        - the new current element
        - the line number after parsing this element
        """
        raise ParserException("Trying to parse the generic base element!", linenumber)

    @staticmethod
    def parse_inner(match, current, linenumber=None):
        """
        Do the parsing for every element. Checks if the match exists.
        Arguments:
        - match: the match of this elements pattern
        - current = the current element of the document. Should be a fork.
        - linenumber: the current line number, for error messages
        Returns:
        - new line number
        """
        if match is None:
            raise ParserException("Source does not match!", linenumber)
        length = match.group().count("\n")
73
        return length + (0 if linenumber is None else linenumber)
Robin Sonnabend's avatar
Robin Sonnabend committed
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89

    @staticmethod
    def parse_outer(element, current):
        """
        Handle the insertion of the object into the tree.
        Arguments:
        - element: the new parsed element to insert
        - current: the current element of the parsed document
        Returns:
        - the new current element
        """
        current.append(element)
        if isinstance(element, Fork):
            return element
        return current

Robin Sonnabend's avatar
Robin Sonnabend committed
90
    PATTERN = r"x(?<!x)" # yes, a master piece, but it should never be called
Robin Sonnabend's avatar
Robin Sonnabend committed
91
92

class Content(Element):
93
    def __init__(self, children, linenumber):
Robin Sonnabend's avatar
Robin Sonnabend committed
94
        self.children = children
95
        self.linenumber = linenumber
Robin Sonnabend's avatar
Robin Sonnabend committed
96

Robin Sonnabend's avatar
Robin Sonnabend committed
97
98
    def render(self, show_private):
        return "".join(map(lambda e: e.render(show_private), self.children))
Robin Sonnabend's avatar
Robin Sonnabend committed
99
100
101
102
103
104
105
106

    def dump(self, level=None):
        if level is None:
            level = 0
        print("{}content:".format(" " * level))
        for child in self.children:
            child.dump(level + 1)

107
108
109
110
    def get_tags(self, tags):
        tags.extend([child for child in self.children if isinstance(child, Tag)])
        return tags

Robin Sonnabend's avatar
Robin Sonnabend committed
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
    @staticmethod
    def parse(match, current, linenumber=None):
        linenumber = Element.parse_inner(match, current, linenumber)
        if match.group("content") is None:
            raise ParserException("Content is missing its content!", linenumber)
        content = match.group("content")
        element = Content.from_content(content, linenumber)
        if len(content) == 0:
            return current, linenumber
        current = Element.parse_outer(element, current)
        return current, linenumber

    @staticmethod
    def from_content(content, linenumber):
        children = []
        while len(content) > 0:
            matched = False
            for pattern in TEXT_PATTERNS:
                match = pattern.match(content)
                if match is not None:
                    matched = True
                    children.append(TEXT_PATTERNS[pattern](match, linenumber))
                    content = content[len(match.group()):]
                    break
            if not matched:
                raise ParserException("Content does not match inner!", linenumber)
137
        return Content(children, linenumber)
Robin Sonnabend's avatar
Robin Sonnabend committed
138

Robin Sonnabend's avatar
Robin Sonnabend committed
139
140
141
142
    # v1: has problems with missing semicolons
    #PATTERN = r"\s*(?<content>(?:[^\[\];]+)?(?:\[[^\]]+\][^;\[\]]*)*);"
    # v2: does not require the semicolon, but the newline
    PATTERN = r"\s*(?<content>(?:[^\[\];\r\n]+)?(?:\[[^\]\r\n]+\][^;\[\]\r\n]*)*);?"
Robin Sonnabend's avatar
Robin Sonnabend committed
143
144

class Text:
145
    def __init__(self, text, linenumber):
Robin Sonnabend's avatar
Robin Sonnabend committed
146
        self.text = text
147
        self.linenumber = linenumber
Robin Sonnabend's avatar
Robin Sonnabend committed
148

Robin Sonnabend's avatar
Robin Sonnabend committed
149
    def render(self, show_private):
150
        return escape_tex(self.text)
Robin Sonnabend's avatar
Robin Sonnabend committed
151
152
153
154
155
156
157
158
159
160
161
162
163

    def dump(self, level=None):
        if level is None:
            level = 0
        print("{}text: {}".format(" " * level, self.text))

    @staticmethod
    def parse(match, linenumber):
        if match is None:
            raise ParserException("Text is not actually a text!", linenumber)
        content = match.group("text")
        if content is None:
            raise ParserException("Text is empty!", linenumber)
164
        return Text(content, linenumber)
Robin Sonnabend's avatar
Robin Sonnabend committed
165
166
167
168
169

    PATTERN = r"(?<text>[^\[]+)(?:(?=\[)|$)"


class Tag:
170
    def __init__(self, name, values, linenumber):
Robin Sonnabend's avatar
Robin Sonnabend committed
171
172
        self.name = name
        self.values = values
173
        self.linenumber = linenumber
Robin Sonnabend's avatar
Robin Sonnabend committed
174

Robin Sonnabend's avatar
Robin Sonnabend committed
175
    def render(self, show_private):
176
177
178
179
        if self.name == "url":
            return r"\url{{{}}}".format(self.values[0])
        #return r"\textbf{{{}:}} {}".format(escape_tex(self.name.capitalize()), "; ".join(map(escape_tex, self.values)));
        return r"\textbf{{{}:}} {}".format(escape_tex(self.name.capitalize()), escape_tex(self.values[0]))
Robin Sonnabend's avatar
Robin Sonnabend committed
180
181
182
183

    def dump(self, level=None):
        if level is None:
            level = 0
Robin Sonnabend's avatar
Robin Sonnabend committed
184
        print("{}tag: {}: {}".format(" " * level, self.name, "; ".join(self.values)))
Robin Sonnabend's avatar
Robin Sonnabend committed
185
186
187
188
189
190
191
192
193

    @staticmethod
    def parse(match, linenumber):
        if match is None:
            raise ParserException("Tag is not actually a tag!", linenumber)
        content = match.group("content")
        if content is None:
            raise ParserException("Tag is empty!", linenumber)
        parts = content.split(";")
194
        return Tag(parts[0], parts[1:], linenumber)
Robin Sonnabend's avatar
Robin Sonnabend committed
195
196
197
198

    PATTERN = r"\[(?<content>(?:[^;\]]*;)*(?:[^;\]]*))\]"

class Empty(Element):
199
200
    def __init__(self, linenumber):
        linenumber = linenumber
Robin Sonnabend's avatar
Robin Sonnabend committed
201

Robin Sonnabend's avatar
Robin Sonnabend committed
202
    def render(self, show_private):
Robin Sonnabend's avatar
Robin Sonnabend committed
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
        return ""

    def dump(self, level=None):
        if level is None:
            level = 0
        print("{}empty".format(" " * level))

    @staticmethod
    def parse(match, current, linenumber=None):
        linenumber = Element.parse_inner(match, current, linenumber)
        return current, linenumber

    PATTERN = r"\s+"

class Remark(Element):
218
    def __init__(self, name, value, linenumber):
Robin Sonnabend's avatar
Robin Sonnabend committed
219
220
        self.name = name
        self.value = value
221
        self.linenumber = linenumber
Robin Sonnabend's avatar
Robin Sonnabend committed
222

Robin Sonnabend's avatar
Robin Sonnabend committed
223
    def render(self, show_private):
Robin Sonnabend's avatar
Robin Sonnabend committed
224
225
226
227
228
229
230
        return r"\textbf{{{}}}: {}".format(self.name, self.value)

    def dump(self, level=None):
        if level is None:
            level = 0
        print("{}remark: {}: {}".format(" " * level, self.name, self.value))

Robin Sonnabend's avatar
Robin Sonnabend committed
231
232
233
    def get_tags(self, tags):
        return tags

Robin Sonnabend's avatar
Robin Sonnabend committed
234
235
236
237
238
239
240
241
242
243
    @staticmethod
    def parse(match, current, linenumber=None):
        linenumber = Element.parse_inner(match, current, linenumber)
        if match.group("content") is None:
            raise ParserException("Remark is missing its content!", linenumber)
        content = match.group("content")
        parts = content.split(";", 1)
        if len(parts) < 2:
            raise ParserException("Remark value is empty!", linenumber)
        name, value = parts
244
        element = Remark(name, value, linenumber)
Robin Sonnabend's avatar
Robin Sonnabend committed
245
246
247
248
249
250
        current = Element.parse_outer(element, current)
        return current, linenumber

    PATTERN = r"\s*\#(?<content>[^\n]+)"

class Fork(Element):
251
    def __init__(self, environment, name, parent, linenumber, children=None):
Robin Sonnabend's avatar
Robin Sonnabend committed
252
        self.environment = environment if environment is None or len(environment) > 0 else None
253
        self.name = name.strip() if (name is not None and len(name) > 0) else None
Robin Sonnabend's avatar
Robin Sonnabend committed
254
        self.parent = parent
255
        self.linenumber = linenumber
Robin Sonnabend's avatar
Robin Sonnabend committed
256
257
258
259
260
261
262
263
264
        self.children = [] if children is None else children

    def dump(self, level=None):
        if level is None:
            level = 0
        print("{}fork: {}".format(" " * level, self.name))
        for child in self.children:
            child.dump(level + 1)

Robin Sonnabend's avatar
Robin Sonnabend committed
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
    def test_private(self, name):
        stripped_name = name.replace(":", "").strip()
        return stripped_name in config.PRIVATE_KEYS

    def render(self, show_private, toplevel=False):
        name_line = self.name if self.name is not None and len(self.name) > 0 else ""
        begin_line = r"\begin{itemize}"
        end_line = r"\end{itemize}"
        content_parts = []
        for child in self.children:
            part = child.render(show_private)
            if len(part.strip()) == 0:
                continue
            if not part.startswith(r"\item"):
                part = r"\item {}".format(part)
            content_parts.append(part)
        content_lines = "\n".join(content_parts)
        if toplevel:
            return "\n".join([begin_line, content_lines, end_line])
        elif self.test_private(self.name):
            if show_private:
                return content_lines
            else:
                return ""
        else:
            return "\n".join([name_line, begin_line, content_lines, end_line])
Robin Sonnabend's avatar
Robin Sonnabend committed
291

292
293
294
295
296
297
298
    def get_tags(self, tags=None):
        if tags is None:
            tags = []
        for child in self.children:
            child.get_tags(tags)
        return tags

Robin Sonnabend's avatar
Robin Sonnabend committed
299
300
301
302
303
304
305
306
    def is_anonymous(self):
        return self.environment == None

    def is_root(self):
        return self.parent is None

    @staticmethod
    def create_root():
307
        return Fork(None, None, None, 0)
Robin Sonnabend's avatar
Robin Sonnabend committed
308
309
310
311
312
313
314
315
316
317
318
319

    @staticmethod
    def parse(match, current, linenumber=None):
        linenumber = Element.parse_inner(match, current, linenumber)
        environment = match.group("environment")
        name1 = match.group("name1")
        name2 = match.group("name2")
        name = ""
        if name1 is not None:
            name = name1
        if name2 is not None:
            name += " {}".format(name2)
320
        element = Fork(environment, name, current, linenumber)
Robin Sonnabend's avatar
Robin Sonnabend committed
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
        current = Element.parse_outer(element, current)
        return current, linenumber

    @staticmethod
    def parse_end(match, current, linenumber=None):
        linenumber = Element.parse_inner(match, current, linenumber)
        if current.is_root():
            raise ParserException("Found end tag for root element!", linenumber)
        current = current.parent
        return current, linenumber

    def append(self, element):
        self.children.append(element)

    PATTERN = r"\s*(?<name1>[^{};]+)?{(?<environment>\S+)?\h*(?<name2>[^\n]+)?"
    END_PATTERN = r"\s*};?"

PATTERNS = OrderedDict([
    (re.compile(Fork.PATTERN), Fork.parse),
    (re.compile(Fork.END_PATTERN), Fork.parse_end),
    (re.compile(Remark.PATTERN), Remark.parse),
    (re.compile(Content.PATTERN), Content.parse),
    (re.compile(Empty.PATTERN), Empty.parse)
])

TEXT_PATTERNS = OrderedDict([
    (re.compile(Text.PATTERN), Text.parse),
    (re.compile(Tag.PATTERN), Tag.parse)
])

def parse(source):
    linenumber = 1
    tree = Fork.create_root()
    current = tree
    while len(source) > 0:
        found = False
        for pattern in PATTERNS:
            match = pattern.match(source)
            if match is not None:
                source = source[len(match.group()):]
                current, linenumber = PATTERNS[pattern](match, current, linenumber)
                found = True
                break
        if not found:
            raise ParserException("No matching syntax element found!", linenumber)
Robin Sonnabend's avatar
Robin Sonnabend committed
366
    if current is not tree:
367
        raise ParserException("Source ended within fork! (started at line {})".format(current.linenumber))
Robin Sonnabend's avatar
Robin Sonnabend committed
368
369
    return tree

Robin Sonnabend's avatar
Robin Sonnabend committed
370
def main(test_file_name=None):
Robin Sonnabend's avatar
Robin Sonnabend committed
371
    source = ""
Robin Sonnabend's avatar
Robin Sonnabend committed
372
373
    test_file_name = test_file_name or "source0"
    with open("test/{}.txt".format(test_file_name)) as f:
Robin Sonnabend's avatar
Robin Sonnabend committed
374
        source = f.read()
Robin Sonnabend's avatar
Robin Sonnabend committed
375
376
377
378
379
380
381
    try:
        tree = parse(source)
        tree.dump()
    except ParserException as e:
        print(e)
    else:
        print("worked!")
Robin Sonnabend's avatar
Robin Sonnabend committed
382
383
384
    

if __name__ == "__main__":
Robin Sonnabend's avatar
Robin Sonnabend committed
385
386
    test_file_name = sys.argv[1] if len(sys.argv) > 1 else None
    exit(main(test_file_name))