Add document sync

c630d5c9 · Robin Sonnabend · 2a79ac08 · c630d5c9 · c630d5c9
Commit c630d5c9 authored 6 years ago by Robin Sonnabend
--- a/.gitignore
+++ b/.gitignore
+__pycache__/
+documents/
--- a/moodle_sync.py
+++ b/moodle_sync.py
+#!/usr/bin/env python3
+
+import urllib
+import os
+from datetime import datetime, timezone
+
+import requests
+import bs4
+
+from shib_client import authenticate
+
+
+AUTH_URL = "https://moodle.rwth-aachen.de/auth/shibboleth/index.php"
+INDEX_URL = "https://moodle.rwth-aachen.de/my/"
+
+
+def query_courses(session):
+    content = session.get(INDEX_URL).text
+    soup = bs4.BeautifulSoup(content, "html.parser")
+    course_list = soup.find("div", id="coc-courselist")
+    courses = course_list.find_all("div", class_="coc-course")
+    course_anchors = [
+        course.find("a", href=True)
+        for course in courses
+    ]
+    course_links = {
+        anchor["title"]: anchor["href"]
+        for anchor in course_anchors
+    }
+    return course_links
+
+
+def escape_path(path_part):
+    return path_part.replace(os.path.sep, "".join(("\\", os.path.sep)))
+
+
+def query_documents(session, link):
+    content = session.get(link).text
+    soup = bs4.BeautifulSoup(content, "html.parser")
+    for folder in soup.find_all("li", class_="folder"):
+        anchor = folder.find("a", href=True)
+        title = "".join(
+            text
+            for text in anchor.find("span", class_="instancename").contents
+            if isinstance(text, str))
+        title = escape_path(title)
+        print("folder", title)
+        for name, href in query_documents(session, anchor["href"]):
+            yield os.path.join(title, name), href
+
+    for span in soup.find_all("span", class_="fp-filename-icon"):
+        href = span.find("a")["href"]
+        name = span.find("span", class_="fp-filename").text
+        name = escape_path(name)
+        print("document", name)
+        yield name, href
+
+    for element in soup.find_all("li", class_="resource"):
+        div = element.find("div", class_="activityinstance")
+        href = div.find("a", href=True)["href"]
+        title = "".join(
+            text
+            for text in div.find("span", class_="instancename").contents
+            if isinstance(text, str))
+        title = escape_path(title)
+        print("resource", title)
+        yield title, href
+
+
+
+def sync_file(session, filename, url, directory):
+    path = os.path.join(directory, filename)
+    headers = {}
+    import locale
+    locale.setlocale(locale.LC_TIME, "C")
+    try:
+        last_modified = datetime.fromtimestamp(
+            os.path.getmtime(path),
+            datetime.now(timezone.utc).astimezone().tzinfo)
+        last_modified = last_modified - last_modified.utcoffset()
+    except FileNotFoundError:
+        pass
+    else:
+        print(last_modified)
+        headers["If-Modified-Since"] = last_modified.strftime(
+            "%a, %d %b %Y %H:%M:%S GMT")
+    result = session.get(url, headers=headers)
+    if result.status_code == 200:
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+        with open(path, "wb") as file:
+            file.write(result.content)
+
+    
+    
+
+def main():
+    import argparse
+    parser = argparse.ArgumentParser("RWTH Moodle Sync")
+    parser.add_argument("--username")
+    parser.add_argument("--password")
+    parser.add_argument("--directory", default="documents")
+
+    arguments = parser.parse_args()
+
+    username = arguments.username
+    if username is None:
+        username = input("Username: ")
+
+    password = arguments.password
+    if password is None:
+        import getpass
+        password = getpass.getpass("Password: ")
+
+    session = authenticate(AUTH_URL, username, password)
+
+    course_links = query_courses(session)
+    for course_name, course_link in course_links.items():
+        print(course_name)
+        for path, link in query_documents(session, course_link):
+            print(path, link)
+            sync_file(
+                session,
+                os.path.join(course_name, path),
+                link, arguments.directory)
+
+
+
+if __name__ == "__main__":
+    main()