From aa180d51df507c527f1f16b55968dde123b27b0f Mon Sep 17 00:00:00 2001
From: Andreas <andreasv@fsmpi.rwth-aachen.de>
Date: Thu, 19 Jul 2018 14:45:52 +0200
Subject: [PATCH] cleaned up sorter

---
 sorter.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/sorter.py b/sorter.py
index 1a1df26..f48b0b4 100644
--- a/sorter.py
+++ b/sorter.py
@@ -73,7 +73,11 @@ def insert_transcoded_video(jobid, jobtype, data, state, status):
 		return
 	insert_video(data['lecture_id'], data['output']['path'], data['format_id'], status['hash'], status['filesize'], status['duration'], data['source_id'] )
 
-def parse_file_name(splitFileName):
+def split_filename(filename):
+	# '_' and ' ' are handled like '-'
+	return filename.replace('_','-').replace(' ','-').split('-')
+
+def parse_filename(splitFileName):
 	# filenames: <handle>-<sorter>-<format>.mp4
 	data = {'keywords': []}
 	for fileNameChunk in splitFileName:
@@ -117,19 +121,21 @@ def filter_lectures_by_keywords(lectures, keywords):
 					return [lecture]
 	return []
 
+def extract_format_keyword_from_filename(splitFileName):
+	return splitFileName[-1].split('.',1)[0].lower()
+
 def filter_formats_by_filename(splitFileName):
-	# default format is "unknown", with id 0
+	formatstring = extract_format_keyword_from_filename(splitFileName)
 	formats = query('SELECT * FROM formats ORDER BY prio DESC')
 	for videoformat in formats:
 		# we match the last part of the file name without the extension
-		formatstring = splitFileName[-1].split('.',1)[0].lower()
 		if formatstring in videoformat['keywords'].replace(',',' ').split(' '):
 			return videoformat['id']
+	# default format is "unknown", with id 0
 	return 0
 
 def sort_file(filename, course=None, lectures=None):
-	# '_' and ' ' are handled like '-'
-	splitFileName = filename.replace('_','-').replace(' ','-').split('-')
+	splitFileName = split_filename(filename)
 	if not course:
 		handle = splitFileName[0]
 		if splitFileName[0].endswith('ws') or splitFileName[0].endswith('ss'):
@@ -141,7 +147,7 @@ def sort_file(filename, course=None, lectures=None):
 	if not lectures:
 		lectures = query('SELECT * from lectures where course_id = ?', course['id'])
 	# parse all data from the file name
-	data = parse_file_name(splitFileName)
+	data = parse_filename(splitFileName)
 	# try to match the file on a single lecture
 	matches = filter_lectures_by_datetime(lectures, data.get('date'), data.get('time'))
 	# if we can't match exactly  based on date and time, we have to match keywords
-- 
GitLab