Skip to content
Snippets Groups Projects
Commit c630d5c9 authored by Robin Sonnabend's avatar Robin Sonnabend
Browse files

Add document sync

parent 2a79ac08
Branches
No related tags found
No related merge requests found
__pycache__/
documents/
#!/usr/bin/env python3
import urllib
import os
from datetime import datetime, timezone
import requests
import bs4
from shib_client import authenticate
AUTH_URL = "https://moodle.rwth-aachen.de/auth/shibboleth/index.php"
INDEX_URL = "https://moodle.rwth-aachen.de/my/"
def query_courses(session):
content = session.get(INDEX_URL).text
soup = bs4.BeautifulSoup(content, "html.parser")
course_list = soup.find("div", id="coc-courselist")
courses = course_list.find_all("div", class_="coc-course")
course_anchors = [
course.find("a", href=True)
for course in courses
]
course_links = {
anchor["title"]: anchor["href"]
for anchor in course_anchors
}
return course_links
def escape_path(path_part):
return path_part.replace(os.path.sep, "".join(("\\", os.path.sep)))
def query_documents(session, link):
content = session.get(link).text
soup = bs4.BeautifulSoup(content, "html.parser")
for folder in soup.find_all("li", class_="folder"):
anchor = folder.find("a", href=True)
title = "".join(
text
for text in anchor.find("span", class_="instancename").contents
if isinstance(text, str))
title = escape_path(title)
print("folder", title)
for name, href in query_documents(session, anchor["href"]):
yield os.path.join(title, name), href
for span in soup.find_all("span", class_="fp-filename-icon"):
href = span.find("a")["href"]
name = span.find("span", class_="fp-filename").text
name = escape_path(name)
print("document", name)
yield name, href
for element in soup.find_all("li", class_="resource"):
div = element.find("div", class_="activityinstance")
href = div.find("a", href=True)["href"]
title = "".join(
text
for text in div.find("span", class_="instancename").contents
if isinstance(text, str))
title = escape_path(title)
print("resource", title)
yield title, href
def sync_file(session, filename, url, directory):
path = os.path.join(directory, filename)
headers = {}
import locale
locale.setlocale(locale.LC_TIME, "C")
try:
last_modified = datetime.fromtimestamp(
os.path.getmtime(path),
datetime.now(timezone.utc).astimezone().tzinfo)
last_modified = last_modified - last_modified.utcoffset()
except FileNotFoundError:
pass
else:
print(last_modified)
headers["If-Modified-Since"] = last_modified.strftime(
"%a, %d %b %Y %H:%M:%S GMT")
result = session.get(url, headers=headers)
if result.status_code == 200:
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, "wb") as file:
file.write(result.content)
def main():
import argparse
parser = argparse.ArgumentParser("RWTH Moodle Sync")
parser.add_argument("--username")
parser.add_argument("--password")
parser.add_argument("--directory", default="documents")
arguments = parser.parse_args()
username = arguments.username
if username is None:
username = input("Username: ")
password = arguments.password
if password is None:
import getpass
password = getpass.getpass("Password: ")
session = authenticate(AUTH_URL, username, password)
course_links = query_courses(session)
for course_name, course_link in course_links.items():
print(course_name)
for path, link in query_documents(session, course_link):
print(path, link)
sync_file(
session,
os.path.join(course_name, path),
link, arguments.directory)
if __name__ == "__main__":
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment