Skip to content
Snippets Groups Projects
Select Git revision
  • c881adf3677c5d8d93235ec0b010d00bdbe567ea
  • master default protected
  • md-export
  • th/mail
  • 179-einladungen-zum-aushaengen-drucken
5 results

protoparser.py

Blame
  • Robin Sonnabend's avatar
    Robin Sonnabend authored
    Also fixes some latent, rather unimportant bugs
    6d9c3fdd
    History
    Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    protoparser.py 23.76 KiB
    import regex as re
    import sys
    from collections import OrderedDict
    from enum import Enum
    
    from shared import escape_tex
    from utils import footnote_hash
    
    import config
    
    INDENT_LETTER = "-"
    
    
    class ParserException(Exception):
        name = "Parser Exception"
        has_explanation = False
    
        def __init__(self, message, linenumber=None, tree=None):
            self.message = message
            self.linenumber = linenumber
            self.tree = tree
    
        def __str__(self):
            result = ""
            if self.linenumber is not None:
                result = "Exception at line {}: {}".format(
                    self.linenumber, self.message)
            else:
                result = "Exception: {}".format(self.message)
            if self.has_explanation:
                result += "\n" + self.explanation
            return result
    
    
    class RenderType(Enum):
        latex = 0
        wikitext = 1
        plaintext = 2
        html = 3
        dokuwiki = 4
    
    
    def _not_implemented(self, render_type):
        return NotImplementedError(
            "The rendertype {} has not been implemented for {}.".format(
                render_type.name, self.__class__.__name__))
    
    
    class Element:
        """
        Generic (abstract) base element. Should never really exist.
        Template for what an element class should contain.
        """
        def render(self, render_type, show_private, level=None, protocol=None):
            """
            Renders the element to TeX.
            Returns:
            - a TeX-representation of the element
            """
            return "Generic Base Syntax Element, this is not supposed to appear."
    
        def dump(self, level=None):
            if level is None:
                level = 0
            return "{}element".format(INDENT_LETTER * level)
    
        @staticmethod
        def parse(match, current, linenumber=None):
            """
            Parses a match of this elements pattern.
            Arguments:
            - match: the match of this elements pattern
            - current: the current element of the document. Should be a fork.
                May be modified.
            - linenumber: the current line number, for error messages
            Returns:
            - the new current element
            - the line number after parsing this element
            """
            raise ParserException(
                "Trying to parse the generic base element!", linenumber)
    
        @staticmethod
        def parse_inner(match, current, linenumber=None):
            """
            Do the parsing for every element. Checks if the match exists.
            Arguments:
            - match: the match of this elements pattern
            - current = the current element of the document. Should be a fork.
            - linenumber: the current line number, for error messages
            Returns:
            - new line number
            """
            if match is None:
                raise ParserException("Source does not match!", linenumber)
            length = match.group().count("\n")
            return length + (0 if linenumber is None else linenumber)
    
        @staticmethod
        def parse_outer(element, current):
            """
            Handle the insertion of the object into the tree.
            Arguments:
            - element: the new parsed element to insert
            - current: the current element of the parsed document
            Returns:
            - the new current element
            """
            current.append(element)
            if isinstance(element, Fork):
                return element
            else:
                element.fork = current
                return current
    
        PATTERN = r"x(?<!x)"
    
    
    class Content(Element):
        def __init__(self, children, linenumber):
            self.children = children
            self.linenumber = linenumber
    
        def render(self, render_type, show_private, level=None, protocol=None):
            return "".join(map(lambda e: e.render(
                render_type, show_private, level=level, protocol=protocol),
                self.children))
    
        def dump(self, level=None):
            if level is None:
                level = 0
            result_lines = ["{}content:".format(INDENT_LETTER * level)]
            for child in self.children:
                result_lines.append(child.dump(level + 1))
            return "\n".join(result_lines)
    
        def get_tags(self, tags):
            tags.extend([
                child for child in self.children
                if isinstance(child, Tag)
            ])
            return tags
    
        @staticmethod
        def parse(match, current, linenumber=None):
            linenumber = Element.parse_inner(match, current, linenumber)
            if match.group("content") is None:
                raise ParserException(
                    "Content is missing its content!", linenumber)
            content = match.group("content")
            element = Content.from_content(content, current, linenumber)
            if len(content) == 0:
                return current, linenumber
            current = Element.parse_outer(element, current)
            return current, linenumber
    
        @staticmethod
        def from_content(content, current, linenumber):
            children = []
            while len(content) > 0:
                matched = False
                for pattern in TEXT_PATTERNS:
                    match = pattern.match(content)
                    if match is not None:
                        matched = True
                        children.append(TEXT_PATTERNS[pattern](
                            match, current, linenumber))
                        content = content[len(match.group()):]
                        break
                if not matched:
                    raise ParserException(
                        "Dies ist kein valider Tag! "
                        "(mögliche Tags sind: {})".format(
                            ", ".join(Tag.KNOWN_TAGS)),
                        linenumber)
            return Content(children, linenumber)
    
        PATTERN = (
            r"\s*(?<content>(?:(?:[^\[\];\r\n{}]+)|(?:[^\[\];\r\n{}]+)?"
            r"(?:\[[^\]\r\n{}]+\][^;\[\]\r\n{}]*)+));?")
    
    
    class Text:
        def __init__(self, text, linenumber, fork):
            self.text = text
            self.linenumber = linenumber
            self.fork = fork
    
        def render(self, render_type, show_private, level=None, protocol=None):
            if render_type == RenderType.latex:
                return escape_tex(self.text)
            elif render_type == RenderType.wikitext:
                return self.text
            elif render_type == RenderType.plaintext:
                return self.text
            elif render_type == RenderType.html:
                return self.text
            elif render_type == RenderType.dokuwiki:
                return self.text
            else:
                raise _not_implemented(self, render_type)
    
        def dump(self, level=None):
            if level is None:
                level = 0
            return "{}text: {}".format(INDENT_LETTER * level, self.text)
    
        @staticmethod
        def parse(match, current, linenumber):
            if match is None:
                raise ParserException("Text is not actually a text!", linenumber)
            content = match.group("text")
            if content is None:
                raise ParserException("Text is empty!", linenumber)
            return Text(content, linenumber, current)
    
        PATTERN = r"(?<text>\[?[^\[{}]+)(?:(?=\[)|$)"
    
    
    class Tag:
        def __init__(self, name, values, linenumber, fork):
            self.name = name
            self.values = values
            self.linenumber = linenumber
            self.fork = fork
    
        def render(self, render_type, show_private, level=None, protocol=None):
            if render_type == RenderType.latex:
                if self.name == "url":
                    return r"\url{{{}}}".format(self.values[0])
                elif self.name == "todo":
                    if not show_private:
                        return ""
                    return self.todo.render_latex(current_protocol=protocol)
                elif self.name == "beschluss":
                    if len(self.decision.categories):
                        return r"\Beschluss[{}]{{{}}}".format(
                            escape_tex(self.decision.get_categories_str()),
                            escape_tex(self.decision.content))
                    else:
                        return r"\Beschluss{{{}}}".format(self.decision.content)
                elif self.name == "footnote":
                    return r"\footnote{{{}}}".format(self.values[0])
                return r"\textbf{{{}:}} {}".format(
                    escape_tex(self.name.capitalize()),
                    escape_tex(";".join(self.values)))
            elif render_type == RenderType.plaintext:
                if self.name == "url":
                    return self.values[0]
                elif self.name == "todo":
                    if not show_private:
                        return ""
                    return self.values[0]
                elif self.name == "footnote":
                    return "[^]({})".format(self.values[0])
                return "{}: {}".format(
                    self.name.capitalize(), ";".join(self.values))
            elif render_type == RenderType.wikitext:
                if self.name == "url":
                    return "[{0} {0}]".format(self.values[0])
                elif self.name == "todo":
                    if not show_private:
                        return ""
                    return self.todo.render_wikitext(current_protocol=protocol)
                elif self.name == "footnote":
                    return "<ref>{}</ref>".format(self.values[0])
                return "'''{}:''' {}".format(
                    self.name.capitalize(), ";".join(self.values))
            elif render_type == RenderType.html:
                if self.name == "url":
                    return "<a href=\"{0}\">{0}</a>".format(self.values[0])
                elif self.name == "todo":
                    if not show_private:
                        return ""
                    if getattr(self, "todo", None) is not None:
                        return self.todo.render_html(current_protocol=protocol)
                    else:
                        return "<b>Todo:</b> {}".format(";".join(self.values))
                elif self.name == "beschluss":
                    if getattr(self, "decision", None) is not None:
                        parts = ["<b>Beschluss:</b>", self.decision.content]
                        if len(self.decision.categories) > 0:
                            parts.append("<i>{}</i>".format(
                                self.decision.get_categories_str()))
                        return " ".join(parts)
                    else:
                        return "<b>Beschluss:</b> {}".format(self.values[0])
                elif self.name == "footnote":
                    return (
                        '<sup id="#fnref{0}"><a href="#fn{0}">Fn</a></sup>'.format(
                            footnote_hash(self.values[0])))
                return "[{}: {}]".format(self.name, ";".join(self.values))
            elif render_type == RenderType.dokuwiki:
                if self.name == "url":
                    return self.values[0]
                elif self.name == "todo":
                    if not show_private:
                        return ""
                    return self.todo.render_wikitext(
                        current_protocol=protocol, use_dokuwiki=True)
                elif self.name == "beschluss":
                    return "**{}:** {}".format(
                        self.name.capitalize(), ";".join(self.values))
                elif self.name == "footnote":
                    return "(({}))".format(self.values[0])
                else:
                    return "**{}:** {}".format(
                        self.name.capitalize(), ";".join(self.values))
            else:
                raise _not_implemented(self, render_type)
    
        def dump(self, level=None):
            if level is None:
                level = 0
            return "{}tag: {}: {}".format(
                INDENT_LETTER * level, self.name, "; ".join(self.values))
    
        @staticmethod
        def parse(match, current, linenumber):
            if match is None:
                raise ParserException("Tag is not actually a tag!", linenumber)
            content = match.group("content")
            if content is None:
                raise ParserException("Tag is empty!", linenumber)
            parts = content.split(";")
            return Tag(parts[0], parts[1:], linenumber, current)
    
        PATTERN = r"\[(?<content>[^\]]*)\]"
    
        KNOWN_TAGS = ["todo", "url", "beschluss", "footnote", "sitzung"]
    
    
    class Empty(Element):
        def __init__(self, linenumber):
            linenumber = linenumber
    
        def render(self, render_type, show_private, level=None, protocol=None):
            return ""
    
        def dump(self, level=None):
            if level is None:
                level = 0
            return "{}empty".format(INDENT_LETTER * level)
    
        @staticmethod
        def parse(match, current, linenumber=None):
            linenumber = Element.parse_inner(match, current, linenumber)
            return current, linenumber
    
        PATTERN = r"(?:\s+|;)"
    
    
    class Remark(Element):
        def __init__(self, name, value, linenumber):
            self.name = name
            self.value = value
            self.linenumber = linenumber
    
        def render(self, render_type, show_private, level=None, protocol=None):
            if render_type == RenderType.latex:
                return r"\textbf{{{}}}: {}".format(self.name, self.value)
            elif render_type == RenderType.wikitext:
                return "{}: {}".format(self.name, self.value)
            elif render_type == RenderType.plaintext:
                return "{}: {}".format(RenderType.plaintex)
            elif render_type == RenderType.html:
                return "<p>{}: {}</p>".format(self.name, self.value)
            elif render_type == RenderType.dokuwiki:
                return r"{}: {}\\".format(self.name, self.value)
            else:
                raise _not_implemented(self, render_type)
    
        def dump(self, level=None):
            if level is None:
                level = 0
            return "{}remark: {}: {}".format(
                INDENT_LETTER * level, self.name, self.value)
    
        def get_tags(self, tags):
            return tags
    
        @staticmethod
        def parse(match, current, linenumber=None):
            linenumber = Element.parse_inner(match, current, linenumber)
            if match.group("content") is None:
                raise ParserException("Remark is missing its content!", linenumber)
            content = match.group("content")
            parts = content.split(";", 1)
            if len(parts) < 2:
                raise ParserException("Remark value is empty!", linenumber)
            name, value = parts
            element = Remark(name, value, linenumber)
            current = Element.parse_outer(element, current)
            return current, linenumber
    
        PATTERN = r"\s*\#(?<content>[^\n]+)"
    
    
    class Fork(Element):
        def __init__(self, is_top, name, parent, linenumber, children=None):
            self.is_top = is_top
            self.name = name.strip() if name else None
            self.parent = parent
            self.linenumber = linenumber
            self.children = [] if children is None else children
    
        def dump(self, level=None):
            if level is None:
                level = 0
            result_lines = [
                "{}fork: {}'{}'".format(
                    INDENT_LETTER * level,
                    "TOP " if self.is_top else "",
                    self.name)
            ]
            for child in self.children:
                result_lines.append(child.dump(level + 1))
            return "\n".join(result_lines)
    
        def test_private(self, name):
            if name is None:
                return False
            stripped_name = name.replace(":", "").strip()
            return stripped_name in config.PRIVATE_KEYWORDS
    
        def render(self, render_type, show_private, level, protocol=None):
            name_line = self.name if self.name is not None else ""
            if level == 0 and self.name == "Todos" and not show_private:
                return ""
            if render_type == RenderType.latex:
                begin_line = r"\begin{itemize}"
                end_line = r"\end{itemize}"
                content_parts = []
                for child in self.children:
                    part = child.render(
                        render_type, show_private, level=level + 1,
                        protocol=protocol)
                    if len(part.strip()) == 0:
                        continue
                    if not part.startswith(r"\item"):
                        part = r"\item {}".format(part)
                    content_parts.append(part)
                content_lines = "\n".join(content_parts)
                if len(content_lines.strip()) == 0:
                    content_lines = "\\item Nichts\n"
                if level == 0:
                    return "\n".join([begin_line, content_lines, end_line])
                elif self.test_private(self.name):
                    if show_private:
                        return (r"\begin{tcolorbox}[breakable,title=Interner "
                                r"Abschnitt]" + "\n"
                                + r"\begin{itemize}" + "\n"
                                + content_lines + "\n"
                                + r"\end{itemize}" + "\n"
                                + r"\end{tcolorbox}")
                    else:
                        return (r"\textit{[An dieser Stelle wurde intern "
                                r"protokolliert.]}")
                else:
                    return "\n".join([
                        escape_tex(name_line), begin_line,
                        content_lines, end_line
                    ])
            elif (render_type == RenderType.wikitext
                    or render_type == RenderType.dokuwiki):
                equal_signs = level + 2
                if render_type == RenderType.dokuwiki:
                    equal_signs = 6 - level
                title_line = "{0} {1} {0}".format("=" * equal_signs, name_line)
                content_parts = []
                for child in self.children:
                    part = child.render(
                        render_type, show_private, level=level + 1,
                        protocol=protocol)
                    if len(part.strip()) == 0:
                        continue
                    content_parts.append(part)
                content_lines = "{}\n\n{}\n".format(
                    title_line, "\n\n".join(content_parts))
                if self.test_private(self.name) and not show_private:
                    return ""
                else:
                    return content_lines
            elif render_type == RenderType.plaintext:
                title_line = "{} {}".format("#" * (level + 1), name_line)
                content_parts = []
                for child in self.children:
                    part = child.render(
                        render_type, show_private, level=level + 1,
                        protocol=protocol)
                    if len(part.strip()) == 0:
                        continue
                    content_parts.append(part)
                content_lines = "{}\n{}".format(
                    title_line, "\n".join(content_parts))
                if self.test_private(self.name) and not show_private:
                    return ""
                else:
                    return content_lines
            elif render_type == RenderType.html:
                depth = level + 1 + getattr(config, "HTML_LEVEL_OFFSET", 0)
                content_lines = ""
                if depth < 5:
                    title_line = "<h{depth}>{content}</h{depth}>".format(
                        depth=depth, content=name_line)
                    content_parts = []
                    for child in self.children:
                        part = child.render(
                            render_type, show_private, level=level + 1,
                            protocol=protocol)
                        if len(part.strip()) == 0:
                            continue
                        content_parts.append("<p>{}</p>".format(part))
                    content_lines = "{}\n\n{}".format(
                        title_line, "\n".join(content_parts))
                else:
                    content_parts = []
                    for child in self.children:
                        part = child.render(
                            render_type, show_private, level=level + 1,
                            protocol=protocol)
                        if len(part.strip()) == 0:
                            continue
                        content_parts.append("<li>{}</li>".format(part))
                    content_lines = "{}\n<ul>\n{}\n</ul>".format(
                        name_line, "\n".join(content_parts))
                if self.test_private(self.name) and not show_private:
                    return ""
                else:
                    return content_lines
            else:
                raise _not_implemented(self, render_type)
    
        def get_tags(self, tags=None):
            if tags is None:
                tags = []
            for child in self.children:
                child.get_tags(tags)
            return tags
    
        def is_anonymous(self):
            return self.name is None
    
        def is_root(self):
            return self.parent is None
    
        def get_top(self):
            if self.is_root() or self.parent.is_root():
                return self
            return self.parent.get_top()
    
        def get_top_number(self):
            if self.is_root():
                return 1
            top = self.get_top()
            tops = [
                child
                for child in top.parent.children
                if isinstance(child, Fork)
            ]
            return tops.index(top) + 1
    
        def get_maxdepth(self):
            child_depths = [
                child.get_maxdepth()
                for child in self.children
                if isinstance(child, Fork)
            ]
            if len(child_depths) > 0:
                return max(child_depths) + 1
            else:
                return 1
    
        def get_visible_elements(self, show_private, elements=None):
            if elements is None:
                elements = set()
            if show_private or not self.test_private(self.name):
                for child in self.children:
                    elements.add(child)
                    if isinstance(child, Content):
                        elements.update(child.children)
                    elif isinstance(child, Fork):
                        child.get_visible_elements(show_private, elements)
            return elements
    
        @staticmethod
        def create_root():
            return Fork(None, None, None, 0)
    
        @staticmethod
        def parse(match, current, linenumber=None):
            linenumber = Element.parse_inner(match, current, linenumber)
            topname = match.group("topname")
            name = match.group("name")
            is_top = False
            if topname is not None:
                is_top = True
                name = topname
            element = Fork(is_top, name, current, linenumber)
            current = Element.parse_outer(element, current)
            return current, linenumber
    
        @staticmethod
        def parse_end(match, current, linenumber=None):
            linenumber = Element.parse_inner(match, current, linenumber)
            if current.is_root():
                raise ParserException(
                    "Found end tag for root element!", linenumber)
            current = current.parent
            return current, linenumber
    
        def append(self, element):
            self.children.append(element)
    
        PATTERN = (
            r"\s*(?<name>(?:[^{};\n])+)?\n?\s*{(?:TOP\h*(?<topname>[^;{}\n]+))?")
        END_PATTERN = r"\s*};?"
    
    
    PATTERNS = OrderedDict([
        (re.compile(Fork.PATTERN), Fork.parse),
        (re.compile(Fork.END_PATTERN), Fork.parse_end),
        (re.compile(Remark.PATTERN), Remark.parse),
        (re.compile(Content.PATTERN), Content.parse),
        (re.compile(Empty.PATTERN), Empty.parse)
    ])
    
    TEXT_PATTERNS = OrderedDict([
        (re.compile(Tag.PATTERN), Tag.parse),
        (re.compile(Text.PATTERN), Text.parse)
    ])
    
    
    def parse(source):
        linenumber = 1
        tree = Fork.create_root()
        current = tree
        while len(source) > 0:
            found = False
            for pattern in PATTERNS:
                match = pattern.match(source)
                if match is not None:
                    source = source[len(match.group()):]
                    try:
                        current, linenumber = PATTERNS[pattern](
                            match, current, linenumber)
                    except ParserException as exc:
                        exc.tree = tree
                        raise exc
                    found = True
                    break
            if not found:
                raise ParserException(
                    "No matching syntax element found!", linenumber, tree=tree)
        if current is not tree:
            raise ParserException(
                "Du hast vergessen, Klammern zu schließen! (die öffnende ist in "
                "Zeile {})".format(
                    current.linenumber), linenumber=current.linenumber, tree=tree)
        return tree
    
    
    def main(test_file_name=None):
        source = ""
        test_file_name = test_file_name or "source0"
        with open("test/{}.txt".format(test_file_name)) as f:
            source = f.read()
        try:
            tree = parse(source)
            print(tree.dump())
        except ParserException as e:
            print(e)
        else:
            print("worked!")
    
    
    if __name__ == "__main__":
        test_file_name = sys.argv[1] if len(sys.argv) > 1 else None
        exit(main(test_file_name))