import regex as re import sys from collections import OrderedDict from enum import Enum from shared import escape_tex from utils import footnote_hash import config INDENT_LETTER = "-" class ParserException(Exception): name = "Parser Exception" has_explanation = False def __init__(self, message, linenumber=None, tree=None): self.message = message self.linenumber = linenumber self.tree = tree def __str__(self): result = "" if self.linenumber is not None: result = "Exception at line {}: {}".format( self.linenumber, self.message) else: result = "Exception: {}".format(self.message) if self.has_explanation: result += "\n" + self.explanation return result class RenderType(Enum): latex = 0 wikitext = 1 plaintext = 2 html = 3 dokuwiki = 4 def _not_implemented(self, render_type): return NotImplementedError( "The rendertype {} has not been implemented for {}.".format( render_type.name, self.__class__.__name__)) class Element: """ Generic (abstract) base element. Should never really exist. Template for what an element class should contain. """ def render(self, render_type, show_private, level=None, protocol=None): """ Renders the element to TeX. Returns: - a TeX-representation of the element """ return "Generic Base Syntax Element, this is not supposed to appear." def dump(self, level=None): if level is None: level = 0 return "{}element".format(INDENT_LETTER * level) @staticmethod def parse(match, current, linenumber=None): """ Parses a match of this elements pattern. Arguments: - match: the match of this elements pattern - current: the current element of the document. Should be a fork. May be modified. - linenumber: the current line number, for error messages Returns: - the new current element - the line number after parsing this element """ raise ParserException( "Trying to parse the generic base element!", linenumber) @staticmethod def parse_inner(match, current, linenumber=None): """ Do the parsing for every element. Checks if the match exists. Arguments: - match: the match of this elements pattern - current = the current element of the document. Should be a fork. - linenumber: the current line number, for error messages Returns: - new line number """ if match is None: raise ParserException("Source does not match!", linenumber) length = match.group().count("\n") return length + (0 if linenumber is None else linenumber) @staticmethod def parse_outer(element, current): """ Handle the insertion of the object into the tree. Arguments: - element: the new parsed element to insert - current: the current element of the parsed document Returns: - the new current element """ current.append(element) if isinstance(element, Fork): return element else: element.fork = current return current PATTERN = r"x(?<!x)" class Content(Element): def __init__(self, children, linenumber): self.children = children self.linenumber = linenumber def render(self, render_type, show_private, level=None, protocol=None): return "".join(map(lambda e: e.render( render_type, show_private, level=level, protocol=protocol), self.children)) def dump(self, level=None): if level is None: level = 0 result_lines = ["{}content:".format(INDENT_LETTER * level)] for child in self.children: result_lines.append(child.dump(level + 1)) return "\n".join(result_lines) def get_tags(self, tags): tags.extend([ child for child in self.children if isinstance(child, Tag) ]) return tags @staticmethod def parse(match, current, linenumber=None): linenumber = Element.parse_inner(match, current, linenumber) if match.group("content") is None: raise ParserException( "Content is missing its content!", linenumber) content = match.group("content") element = Content.from_content(content, current, linenumber) if len(content) == 0: return current, linenumber current = Element.parse_outer(element, current) return current, linenumber @staticmethod def from_content(content, current, linenumber): children = [] while len(content) > 0: matched = False for pattern in TEXT_PATTERNS: match = pattern.match(content) if match is not None: matched = True children.append(TEXT_PATTERNS[pattern]( match, current, linenumber)) content = content[len(match.group()):] break if not matched: raise ParserException( "Dies ist kein valider Tag! " "(mögliche Tags sind: {})".format( ", ".join(Tag.KNOWN_TAGS)), linenumber) return Content(children, linenumber) PATTERN = ( r"\s*(?<content>(?:(?:[^\[\];\r\n{}]+)|(?:[^\[\];\r\n{}]+)?" r"(?:\[[^\]\r\n{}]+\][^;\[\]\r\n{}]*)+));?") class Text: def __init__(self, text, linenumber, fork): self.text = text self.linenumber = linenumber self.fork = fork def render(self, render_type, show_private, level=None, protocol=None): if render_type == RenderType.latex: return escape_tex(self.text) elif render_type == RenderType.wikitext: return self.text elif render_type == RenderType.plaintext: return self.text elif render_type == RenderType.html: return self.text elif render_type == RenderType.dokuwiki: return self.text else: raise _not_implemented(self, render_type) def dump(self, level=None): if level is None: level = 0 return "{}text: {}".format(INDENT_LETTER * level, self.text) @staticmethod def parse(match, current, linenumber): if match is None: raise ParserException("Text is not actually a text!", linenumber) content = match.group("text") if content is None: raise ParserException("Text is empty!", linenumber) return Text(content, linenumber, current) PATTERN = r"(?<text>\[?[^\[{}]+)(?:(?=\[)|$)" class Tag: def __init__(self, name, values, linenumber, fork): self.name = name self.values = values self.linenumber = linenumber self.fork = fork def render(self, render_type, show_private, level=None, protocol=None): if render_type == RenderType.latex: if self.name == "url": return r"\url{{{}}}".format(self.values[0]) elif self.name == "todo": if not show_private: return "" return self.todo.render_latex(current_protocol=protocol) elif self.name == "beschluss": if len(self.decision.categories): return r"\Beschluss[{}]{{{}}}".format( escape_tex(self.decision.get_categories_str()), escape_tex(self.decision.content)) else: return r"\Beschluss{{{}}}".format(self.decision.content) elif self.name == "footnote": return r"\footnote{{{}}}".format(self.values[0]) return r"\textbf{{{}:}} {}".format( escape_tex(self.name.capitalize()), escape_tex(";".join(self.values))) elif render_type == RenderType.plaintext: if self.name == "url": return self.values[0] elif self.name == "todo": if not show_private: return "" return self.values[0] elif self.name == "footnote": return "[^]({})".format(self.values[0]) return "{}: {}".format( self.name.capitalize(), ";".join(self.values)) elif render_type == RenderType.wikitext: if self.name == "url": return "[{0} {0}]".format(self.values[0]) elif self.name == "todo": if not show_private: return "" return self.todo.render_wikitext(current_protocol=protocol) elif self.name == "footnote": return "<ref>{}</ref>".format(self.values[0]) return "'''{}:''' {}".format( self.name.capitalize(), ";".join(self.values)) elif render_type == RenderType.html: if self.name == "url": return "<a href=\"{0}\">{0}</a>".format(self.values[0]) elif self.name == "todo": if not show_private: return "" if getattr(self, "todo", None) is not None: return self.todo.render_html(current_protocol=protocol) else: return "<b>Todo:</b> {}".format(";".join(self.values)) elif self.name == "beschluss": if getattr(self, "decision", None) is not None: parts = ["<b>Beschluss:</b>", self.decision.content] if len(self.decision.categories) > 0: parts.append("<i>{}</i>".format( self.decision.get_categories_str())) return " ".join(parts) else: return "<b>Beschluss:</b> {}".format(self.values[0]) elif self.name == "footnote": return ( '<sup id="#fnref{0}"><a href="#fn{0}">Fn</a></sup>'.format( footnote_hash(self.values[0]))) return "[{}: {}]".format(self.name, ";".join(self.values)) elif render_type == RenderType.dokuwiki: if self.name == "url": return self.values[0] elif self.name == "todo": if not show_private: return "" return self.todo.render_wikitext( current_protocol=protocol, use_dokuwiki=True) elif self.name == "beschluss": return "**{}:** {}".format( self.name.capitalize(), ";".join(self.values)) elif self.name == "footnote": return "(({}))".format(self.values[0]) else: return "**{}:** {}".format( self.name.capitalize(), ";".join(self.values)) else: raise _not_implemented(self, render_type) def dump(self, level=None): if level is None: level = 0 return "{}tag: {}: {}".format( INDENT_LETTER * level, self.name, "; ".join(self.values)) @staticmethod def parse(match, current, linenumber): if match is None: raise ParserException("Tag is not actually a tag!", linenumber) content = match.group("content") if content is None: raise ParserException("Tag is empty!", linenumber) parts = content.split(";") return Tag(parts[0], parts[1:], linenumber, current) PATTERN = r"\[(?<content>[^\]]*)\]" KNOWN_TAGS = ["todo", "url", "beschluss", "footnote", "sitzung"] class Empty(Element): def __init__(self, linenumber): linenumber = linenumber def render(self, render_type, show_private, level=None, protocol=None): return "" def dump(self, level=None): if level is None: level = 0 return "{}empty".format(INDENT_LETTER * level) @staticmethod def parse(match, current, linenumber=None): linenumber = Element.parse_inner(match, current, linenumber) return current, linenumber PATTERN = r"(?:\s+|;)" class Remark(Element): def __init__(self, name, value, linenumber): self.name = name self.value = value self.linenumber = linenumber def render(self, render_type, show_private, level=None, protocol=None): if render_type == RenderType.latex: return r"\textbf{{{}}}: {}".format(self.name, self.value) elif render_type == RenderType.wikitext: return "{}: {}".format(self.name, self.value) elif render_type == RenderType.plaintext: return "{}: {}".format(RenderType.plaintex) elif render_type == RenderType.html: return "<p>{}: {}</p>".format(self.name, self.value) elif render_type == RenderType.dokuwiki: return r"{}: {}\\".format(self.name, self.value) else: raise _not_implemented(self, render_type) def dump(self, level=None): if level is None: level = 0 return "{}remark: {}: {}".format( INDENT_LETTER * level, self.name, self.value) def get_tags(self, tags): return tags @staticmethod def parse(match, current, linenumber=None): linenumber = Element.parse_inner(match, current, linenumber) if match.group("content") is None: raise ParserException("Remark is missing its content!", linenumber) content = match.group("content") parts = content.split(";", 1) if len(parts) < 2: raise ParserException("Remark value is empty!", linenumber) name, value = parts element = Remark(name, value, linenumber) current = Element.parse_outer(element, current) return current, linenumber PATTERN = r"\s*\#(?<content>[^\n]+)" class Fork(Element): def __init__(self, is_top, name, parent, linenumber, children=None): self.is_top = is_top self.name = name.strip() if name else None self.parent = parent self.linenumber = linenumber self.children = [] if children is None else children def dump(self, level=None): if level is None: level = 0 result_lines = [ "{}fork: {}'{}'".format( INDENT_LETTER * level, "TOP " if self.is_top else "", self.name) ] for child in self.children: result_lines.append(child.dump(level + 1)) return "\n".join(result_lines) def test_private(self, name): if name is None: return False stripped_name = name.replace(":", "").strip() return stripped_name in config.PRIVATE_KEYWORDS def render(self, render_type, show_private, level, protocol=None): name_line = self.name if self.name is not None else "" if level == 0 and self.name == "Todos" and not show_private: return "" if render_type == RenderType.latex: begin_line = r"\begin{itemize}" end_line = r"\end{itemize}" content_parts = [] for child in self.children: part = child.render( render_type, show_private, level=level + 1, protocol=protocol) if len(part.strip()) == 0: continue if not part.startswith(r"\item"): part = r"\item {}".format(part) content_parts.append(part) content_lines = "\n".join(content_parts) if len(content_lines.strip()) == 0: content_lines = "\\item Nichts\n" if level == 0: return "\n".join([begin_line, content_lines, end_line]) elif self.test_private(self.name): if show_private: return (r"\begin{tcolorbox}[breakable,title=Interner " r"Abschnitt]" + "\n" + r"\begin{itemize}" + "\n" + content_lines + "\n" + r"\end{itemize}" + "\n" + r"\end{tcolorbox}") else: return (r"\textit{[An dieser Stelle wurde intern " r"protokolliert.]}") else: return "\n".join([ escape_tex(name_line), begin_line, content_lines, end_line ]) elif (render_type == RenderType.wikitext or render_type == RenderType.dokuwiki): equal_signs = level + 2 if render_type == RenderType.dokuwiki: equal_signs = 6 - level title_line = "{0} {1} {0}".format("=" * equal_signs, name_line) content_parts = [] for child in self.children: part = child.render( render_type, show_private, level=level + 1, protocol=protocol) if len(part.strip()) == 0: continue content_parts.append(part) content_lines = "{}\n\n{}\n".format( title_line, "\n\n".join(content_parts)) if self.test_private(self.name) and not show_private: return "" else: return content_lines elif render_type == RenderType.plaintext: title_line = "{} {}".format("#" * (level + 1), name_line) content_parts = [] for child in self.children: part = child.render( render_type, show_private, level=level + 1, protocol=protocol) if len(part.strip()) == 0: continue content_parts.append(part) content_lines = "{}\n{}".format( title_line, "\n".join(content_parts)) if self.test_private(self.name) and not show_private: return "" else: return content_lines elif render_type == RenderType.html: depth = level + 1 + getattr(config, "HTML_LEVEL_OFFSET", 0) content_lines = "" if depth < 5: title_line = "<h{depth}>{content}</h{depth}>".format( depth=depth, content=name_line) content_parts = [] for child in self.children: part = child.render( render_type, show_private, level=level + 1, protocol=protocol) if len(part.strip()) == 0: continue content_parts.append("<p>{}</p>".format(part)) content_lines = "{}\n\n{}".format( title_line, "\n".join(content_parts)) else: content_parts = [] for child in self.children: part = child.render( render_type, show_private, level=level + 1, protocol=protocol) if len(part.strip()) == 0: continue content_parts.append("<li>{}</li>".format(part)) content_lines = "{}\n<ul>\n{}\n</ul>".format( name_line, "\n".join(content_parts)) if self.test_private(self.name) and not show_private: return "" else: return content_lines else: raise _not_implemented(self, render_type) def get_tags(self, tags=None): if tags is None: tags = [] for child in self.children: child.get_tags(tags) return tags def is_anonymous(self): return self.name is None def is_root(self): return self.parent is None def get_top(self): if self.is_root() or self.parent.is_root(): return self return self.parent.get_top() def get_top_number(self): if self.is_root(): return 1 top = self.get_top() tops = [ child for child in top.parent.children if isinstance(child, Fork) ] return tops.index(top) + 1 def get_maxdepth(self): child_depths = [ child.get_maxdepth() for child in self.children if isinstance(child, Fork) ] if len(child_depths) > 0: return max(child_depths) + 1 else: return 1 def get_visible_elements(self, show_private, elements=None): if elements is None: elements = set() if show_private or not self.test_private(self.name): for child in self.children: elements.add(child) if isinstance(child, Content): elements.update(child.children) elif isinstance(child, Fork): child.get_visible_elements(show_private, elements) return elements @staticmethod def create_root(): return Fork(None, None, None, 0) @staticmethod def parse(match, current, linenumber=None): linenumber = Element.parse_inner(match, current, linenumber) topname = match.group("topname") name = match.group("name") is_top = False if topname is not None: is_top = True name = topname element = Fork(is_top, name, current, linenumber) current = Element.parse_outer(element, current) return current, linenumber @staticmethod def parse_end(match, current, linenumber=None): linenumber = Element.parse_inner(match, current, linenumber) if current.is_root(): raise ParserException( "Found end tag for root element!", linenumber) current = current.parent return current, linenumber def append(self, element): self.children.append(element) PATTERN = ( r"\s*(?<name>(?:[^{};\n])+)?\n?\s*{(?:TOP\h*(?<topname>[^;{}\n]+))?") END_PATTERN = r"\s*};?" PATTERNS = OrderedDict([ (re.compile(Fork.PATTERN), Fork.parse), (re.compile(Fork.END_PATTERN), Fork.parse_end), (re.compile(Remark.PATTERN), Remark.parse), (re.compile(Content.PATTERN), Content.parse), (re.compile(Empty.PATTERN), Empty.parse) ]) TEXT_PATTERNS = OrderedDict([ (re.compile(Tag.PATTERN), Tag.parse), (re.compile(Text.PATTERN), Text.parse) ]) def parse(source): linenumber = 1 tree = Fork.create_root() current = tree while len(source) > 0: found = False for pattern in PATTERNS: match = pattern.match(source) if match is not None: source = source[len(match.group()):] try: current, linenumber = PATTERNS[pattern]( match, current, linenumber) except ParserException as exc: exc.tree = tree raise exc found = True break if not found: raise ParserException( "No matching syntax element found!", linenumber, tree=tree) if current is not tree: raise ParserException( "Du hast vergessen, Klammern zu schließen! (die öffnende ist in " "Zeile {})".format( current.linenumber), linenumber=current.linenumber, tree=tree) return tree def main(test_file_name=None): source = "" test_file_name = test_file_name or "source0" with open("test/{}.txt".format(test_file_name)) as f: source = f.read() try: tree = parse(source) print(tree.dump()) except ParserException as e: print(e) else: print("worked!") if __name__ == "__main__": test_file_name = sys.argv[1] if len(sys.argv) > 1 else None exit(main(test_file_name))