Commit c630d5c9 authored by Robin Sonnabend's avatar Robin Sonnabend

Add document sync

parent 2a79ac08
__pycache__/
documents/
#!/usr/bin/env python3
import urllib
import os
from datetime import datetime, timezone
import requests
import bs4
from shib_client import authenticate
AUTH_URL = "https://moodle.rwth-aachen.de/auth/shibboleth/index.php"
INDEX_URL = "https://moodle.rwth-aachen.de/my/"
def query_courses(session):
content = session.get(INDEX_URL).text
soup = bs4.BeautifulSoup(content, "html.parser")
course_list = soup.find("div", id="coc-courselist")
courses = course_list.find_all("div", class_="coc-course")
course_anchors = [
course.find("a", href=True)
for course in courses
]
course_links = {
anchor["title"]: anchor["href"]
for anchor in course_anchors
}
return course_links
def escape_path(path_part):
return path_part.replace(os.path.sep, "".join(("\\", os.path.sep)))
def query_documents(session, link):
content = session.get(link).text
soup = bs4.BeautifulSoup(content, "html.parser")
for folder in soup.find_all("li", class_="folder"):
anchor = folder.find("a", href=True)
title = "".join(
text
for text in anchor.find("span", class_="instancename").contents
if isinstance(text, str))
title = escape_path(title)
print("folder", title)
for name, href in query_documents(session, anchor["href"]):
yield os.path.join(title, name), href
for span in soup.find_all("span", class_="fp-filename-icon"):
href = span.find("a")["href"]
name = span.find("span", class_="fp-filename").text
name = escape_path(name)
print("document", name)
yield name, href
for element in soup.find_all("li", class_="resource"):
div = element.find("div", class_="activityinstance")
href = div.find("a", href=True)["href"]
title = "".join(
text
for text in div.find("span", class_="instancename").contents
if isinstance(text, str))
title = escape_path(title)
print("resource", title)
yield title, href
def sync_file(session, filename, url, directory):
path = os.path.join(directory, filename)
headers = {}
import locale
locale.setlocale(locale.LC_TIME, "C")
try:
last_modified = datetime.fromtimestamp(
os.path.getmtime(path),
datetime.now(timezone.utc).astimezone().tzinfo)
last_modified = last_modified - last_modified.utcoffset()
except FileNotFoundError:
pass
else:
print(last_modified)
headers["If-Modified-Since"] = last_modified.strftime(
"%a, %d %b %Y %H:%M:%S GMT")
result = session.get(url, headers=headers)
if result.status_code == 200:
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, "wb") as file:
file.write(result.content)
def main():
import argparse
parser = argparse.ArgumentParser("RWTH Moodle Sync")
parser.add_argument("--username")
parser.add_argument("--password")
parser.add_argument("--directory", default="documents")
arguments = parser.parse_args()
username = arguments.username
if username is None:
username = input("Username: ")
password = arguments.password
if password is None:
import getpass
password = getpass.getpass("Password: ")
session = authenticate(AUTH_URL, username, password)
course_links = query_courses(session)
for course_name, course_link in course_links.items():
print(course_name)
for path, link in query_documents(session, course_link):
print(path, link)
sync_file(
session,
os.path.join(course_name, path),
link, arguments.directory)
if __name__ == "__main__":
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment