diff --git a/sorter.py b/sorter.py index 1a1df26de5c83cf1104a17c10779056a894ea138..f48b0b4f2b08eca3599da991b0c2dcfb4cee869f 100644 --- a/sorter.py +++ b/sorter.py @@ -73,7 +73,11 @@ def insert_transcoded_video(jobid, jobtype, data, state, status): return insert_video(data['lecture_id'], data['output']['path'], data['format_id'], status['hash'], status['filesize'], status['duration'], data['source_id'] ) -def parse_file_name(splitFileName): +def split_filename(filename): + # '_' and ' ' are handled like '-' + return filename.replace('_','-').replace(' ','-').split('-') + +def parse_filename(splitFileName): # filenames: <handle>-<sorter>-<format>.mp4 data = {'keywords': []} for fileNameChunk in splitFileName: @@ -117,19 +121,21 @@ def filter_lectures_by_keywords(lectures, keywords): return [lecture] return [] +def extract_format_keyword_from_filename(splitFileName): + return splitFileName[-1].split('.',1)[0].lower() + def filter_formats_by_filename(splitFileName): - # default format is "unknown", with id 0 + formatstring = extract_format_keyword_from_filename(splitFileName) formats = query('SELECT * FROM formats ORDER BY prio DESC') for videoformat in formats: # we match the last part of the file name without the extension - formatstring = splitFileName[-1].split('.',1)[0].lower() if formatstring in videoformat['keywords'].replace(',',' ').split(' '): return videoformat['id'] + # default format is "unknown", with id 0 return 0 def sort_file(filename, course=None, lectures=None): - # '_' and ' ' are handled like '-' - splitFileName = filename.replace('_','-').replace(' ','-').split('-') + splitFileName = split_filename(filename) if not course: handle = splitFileName[0] if splitFileName[0].endswith('ws') or splitFileName[0].endswith('ss'): @@ -141,7 +147,7 @@ def sort_file(filename, course=None, lectures=None): if not lectures: lectures = query('SELECT * from lectures where course_id = ?', course['id']) # parse all data from the file name - data = parse_file_name(splitFileName) + data = parse_filename(splitFileName) # try to match the file on a single lecture matches = filter_lectures_by_datetime(lectures, data.get('date'), data.get('time')) # if we can't match exactly based on date and time, we have to match keywords