From 990ec4e15daea10c10ce32a3a63b3f47f04fbc42 Mon Sep 17 00:00:00 2001
From: Robin Sonnabend <robin@fsmpi.rwth-aachen.de>
Date: Wed, 22 Mar 2017 23:58:07 +0100
Subject: [PATCH] Also categorize by number

---
 .gitignore |  2 ++
 create.py  | 14 ++++++++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..56a5adf
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+raw/
+by-*/
diff --git a/create.py b/create.py
index c796e01..1343ec1 100755
--- a/create.py
+++ b/create.py
@@ -23,6 +23,11 @@ PATTERN = r'<li>\s*<div[^>]*>(?<title>[^<]+)<\/div>\s*<div[^>]*><a href="(?<link
 
 RAW_DIR = "raw"
 TITLE_DIR = "by-title"
+NUMBER_DIR = "by-number"
+
+def prepare_title(title):
+    return (title.lower().replace("/", "-").replace(" ", "_").replace("(", "").replace(")", "")
+        .replace("ä", "ae").replace("ö", "oe").replace("ü", "ue"))
 
 def main():
     expr = re.compile(PATTERN)
@@ -33,23 +38,28 @@ def main():
             index = response.read().decode("iso-8859-1")
         os.makedirs(RAW_DIR, exist_ok=True)
         os.makedirs(TITLE_DIR, exist_ok=True)
+        os.makedirs(NUMBER_DIR, exist_ok=True)
         for match in expr.finditer(index):
             title = match.group("title")
             link = match.group("link")
             date = datetime.strptime(match.group("date"), "%d.%m.%Y")
             number = match.group("number")
             version = match.group("version")
-            print("Retrieving '{}' ({}) ...".format(title, version))
             local_name = "{}.pdf".format(number.replace("/", "-"))
             raw_file_name = os.path.abspath(os.path.join(RAW_DIR, local_name))
-            title_dir = os.path.join(TITLE_DIR, title.replace("/", "-"))
+            title_dir = os.path.join(TITLE_DIR, prepare_title(title))
             title_file_name = os.path.join(title_dir, f"{date.strftime('%Y-%m-%d')}-{version}.pdf")
+            number_file_name = os.path.join(NUMBER_DIR, prepare_title(f"{number}.pdf"))
             if not os.path.exists(raw_file_name):
+                print("Retrieving '{}' ({}) ...".format(title, version))
                 urllib.request.urlretrieve(baseurl + link, raw_file_name)
             os.makedirs(title_dir, exist_ok=True)
             if os.path.exists(title_file_name):
                 os.remove(title_file_name)
             os.symlink(raw_file_name, title_file_name)
+            if os.path.exists(number_file_name):
+                os.remove(number_file_name)
+            os.symlink(raw_file_name, number_file_name)
 
 if __name__ == "__main__":
     exit(main())
-- 
GitLab