diff --git a/sorter.py b/sorter.py index cb98e3bc49e9be14407cf945318ddd533213c12d..cbb5990f068cda1736ecb87c5e7f930deec364e4 100644 --- a/sorter.py +++ b/sorter.py @@ -52,6 +52,75 @@ def schedule_thumbnail(lectureid, filePath=None): data = '{"lectureid": "'+str(lectureid)+'", "path": "'+path+'"}' query('INSERT INTO jobs (type, data, time_created) VALUES ("thumbnail", ?, ?)', data, datetime.now()); +def sort_file(filename, course=None, lectures=None): + # filenames: <handle>-<sorter>-<format>.mp4 + # "sorter" musst be found with fuzzy matching. "sorter" musst be one or more of the following types: (inside the loop) + + # '_' and ' ' are handled like '-' + splitfilename = filename.replace('_','-').replace(' ','-').split('-') + if not course: + handle = '-'.join(splitfilename[:2]) + courses = query('SELECT * FROM courses WHERE handle = ?', handle) + if not courses: + return [] + course = courses[0] + if not lectures: + lectures = query('SELECT * from lectures where course_id = ?', course['id']) + # we save all extraced data in a dict + data = {'keywords': []} + # parse the file name and save all data in 'data' + for s in splitfilename: + s = s.replace('.mp4','') + #-<YYMMDD> (date) + #-<HHMM> (time) + #-<keyword> + # Looking for keywords in: title,speaker,comment, comma seperated list in internal + try: + if len(s) == 6: + data['date'] = datetime.strptime(s,'%y%m%d').date() + elif len(s) == 4: + data['time'] = datetime.strptime(s,'%H%M').time() + else: + data['keywords'].append(s) + except ValueError: + # if its not a date or time, handle it as keyword + data['keywords'].append(s) + # try to match the file on a single lecture + matches = [] + + # first try date and time (if one of them is set) + if ('date' in data) or ('time' in data): + for lecture in lectures: + if not ('time' in lecture) or not lecture['time']: + continue + if ('date' in data) and (lecture['time'].date() != data['date']): + continue + if ('time' in data) and (lecture['time'].time() != data['time']): + continue + matches.append(lecture) + # if we can't match exactly based on date and time, we have to match keywords + if ((len(matches) != 1) and (len(data['keywords']) > 0)): + #only test lectures with the correct date/time, if we have any. Else test for matches in all lectures of this course + if len(matches) == 0: + matches.extend(lectures) + found = False + for field in ['title','speaker','comment','internal']: + for lecture in matches: + for keyword in data['keywords']: + # first test for exact match, else make it asci and try substring test + if (keyword == lecture[field]) or \ + (str(keyword).lower() in str(to_ascii(lecture[field]).lower())): + found = True + matches = [lecture] + if found: + break + if found: + break + if found: + break + # now we should have found exactly one match + return matches + @app.route('/internal/sort/now') @mod_required @sched_func(600) @@ -83,80 +152,23 @@ def sort_now(): if ignore: continue filepath = coursepath + '/' + filename - # filenames: <handle>-<sorter>-<format>.mp4 - # "sorter" musst be found with fuzzy matching. "sorter" musst be one or more of the following types: (inside the loop) - - # '_' and ' ' are handled like '-' - splitfilename = filename.replace('_','-').replace(' ','-').split('-') - if not os.path.splitext(filename)[1] == '.mp4': + if not os.path.splitext(filename)[1] == '.mp4': continue - # we save all extraced data in a dict - data = {'keywords': []} - # parse the file name and save all data in 'data' - for s in splitfilename: - s = s.replace('.mp4','') - #-<YYMMDD> (date) - #-<HHMM> (time) - #-<keyword> - # Looking for keywords in: title,speaker,comment, comma seperated list in internal - try: - if len(s) == 6: - data['date'] = datetime.strptime(s,'%y%m%d').date() - elif len(s) == 4: - data['time'] = datetime.strptime(s,'%H%M').time() - else: - data['keywords'].append(s) - except ValueError: - # if its not a date or time, handle it as keyword - data['keywords'].append(s) - # try to match the file on a single lecture - matches = [] - - # first try date and time (if one of them is set) - if ('date' in data) or ('time' in data): - for lecture in lectures: - if not ('time' in lecture) or not lecture['time']: - continue - if ('date' in data) and (lecture['time'].date() != data['date']): - continue - if ('time' in data) and (lecture['time'].time() != data['time']): - continue - matches.append(lecture) - # if we can't match exactly based on date and time, we have to match keywords - if ((len(matches) != 1) and (len(data['keywords']) > 0)): - #only test lectures with the correct date/time, if we have any. Else test for matches in all lectures of this course - if len(matches) == 0: - matches.extend(lectures) - found = False - for field in ['title','speaker','comment','internal']: - for lecture in matches: - for keyword in data['keywords']: - # first test for exact match, else make it asci and try substring test - if (keyword == lecture[field]) or \ - (str(keyword).lower() in str(to_ascii(lecture[field]).lower())): - found = True - matches = [lecture] - if found: - break - if found: - break - if found: - break - # now we should have found exactly one match + matches = sort_file(filename, course=course, lectures=lectures) dbfilepath = mountpoint['prefix']+course['handle']+'/'+filename if len(matches) == 1: # now match the format + splitfilename = filename.replace('_','-').replace(' ','-').split('-') + # default format is "unknown", with id 0 + fmt = 0 for videoformat in formats: #we match the last part of the file name without the extension formatstring = splitfilename[-1].split('.',1)[0].lower() if formatstring in videoformat['keywords'].replace(',',' ').split(' '): - data['format'] = videoformat['id'] + fmt = videoformat['id'] break - # default format is "unknown", with id 0 - if not 'format' in data: - data['format'] = 0 # insert the video into videos_data and log - insert_video( matches[0]['id'], dbfilepath, filepath, data['format']) + insert_video( matches[0]['id'], dbfilepath, filepath, fmt) else: # if we couldn't match the video on exactly one lecture, log an error matches_id = []