diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..f8a2bad8e377890e96602e547ab81dbe95e8b1bc --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__/ +documents/ diff --git a/moodle_sync.py b/moodle_sync.py new file mode 100644 index 0000000000000000000000000000000000000000..567cf9d0cf5dd837e6dc1feb0fa79309bf6bfbc0 --- /dev/null +++ b/moodle_sync.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 + +import urllib +import os +from datetime import datetime, timezone + +import requests +import bs4 + +from shib_client import authenticate + + +AUTH_URL = "https://moodle.rwth-aachen.de/auth/shibboleth/index.php" +INDEX_URL = "https://moodle.rwth-aachen.de/my/" + + +def query_courses(session): + content = session.get(INDEX_URL).text + soup = bs4.BeautifulSoup(content, "html.parser") + course_list = soup.find("div", id="coc-courselist") + courses = course_list.find_all("div", class_="coc-course") + course_anchors = [ + course.find("a", href=True) + for course in courses + ] + course_links = { + anchor["title"]: anchor["href"] + for anchor in course_anchors + } + return course_links + + +def escape_path(path_part): + return path_part.replace(os.path.sep, "".join(("\\", os.path.sep))) + + +def query_documents(session, link): + content = session.get(link).text + soup = bs4.BeautifulSoup(content, "html.parser") + for folder in soup.find_all("li", class_="folder"): + anchor = folder.find("a", href=True) + title = "".join( + text + for text in anchor.find("span", class_="instancename").contents + if isinstance(text, str)) + title = escape_path(title) + print("folder", title) + for name, href in query_documents(session, anchor["href"]): + yield os.path.join(title, name), href + + for span in soup.find_all("span", class_="fp-filename-icon"): + href = span.find("a")["href"] + name = span.find("span", class_="fp-filename").text + name = escape_path(name) + print("document", name) + yield name, href + + for element in soup.find_all("li", class_="resource"): + div = element.find("div", class_="activityinstance") + href = div.find("a", href=True)["href"] + title = "".join( + text + for text in div.find("span", class_="instancename").contents + if isinstance(text, str)) + title = escape_path(title) + print("resource", title) + yield title, href + + + +def sync_file(session, filename, url, directory): + path = os.path.join(directory, filename) + headers = {} + import locale + locale.setlocale(locale.LC_TIME, "C") + try: + last_modified = datetime.fromtimestamp( + os.path.getmtime(path), + datetime.now(timezone.utc).astimezone().tzinfo) + last_modified = last_modified - last_modified.utcoffset() + except FileNotFoundError: + pass + else: + print(last_modified) + headers["If-Modified-Since"] = last_modified.strftime( + "%a, %d %b %Y %H:%M:%S GMT") + result = session.get(url, headers=headers) + if result.status_code == 200: + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "wb") as file: + file.write(result.content) + + + + +def main(): + import argparse + parser = argparse.ArgumentParser("RWTH Moodle Sync") + parser.add_argument("--username") + parser.add_argument("--password") + parser.add_argument("--directory", default="documents") + + arguments = parser.parse_args() + + username = arguments.username + if username is None: + username = input("Username: ") + + password = arguments.password + if password is None: + import getpass + password = getpass.getpass("Password: ") + + session = authenticate(AUTH_URL, username, password) + + course_links = query_courses(session) + for course_name, course_link in course_links.items(): + print(course_name) + for path, link in query_documents(session, course_link): + print(path, link) + sync_file( + session, + os.path.join(course_name, path), + link, arguments.directory) + + + +if __name__ == "__main__": + main()